def validateEntry( self, entry ): """ Check/validate the given Strongs Greek lexicon entry. """ if BibleOrgSysGlobals.debugFlag: assert entry.tag == "entry" BibleOrgSysGlobals.checkXMLNoText( entry, entry.tag, "na19" ) BibleOrgSysGlobals.checkXMLNoTail( entry, entry.tag, "kaq9" ) # Process the entry attributes first strongs5 = None for attrib,value in entry.items(): if attrib == "strongs": strongs5 = value if BibleOrgSysGlobals.verbosityLevel > 2: print( "Validating {} entry…".format( strongs5 ) ) else: logging.warning( "Unprocessed {!r} attribute ({}) in main entry element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert len(strongs5)==5 and strongs5.isdigit() entryResults = {} entryString = "" gettingEssentials = True for j, element in enumerate( entry ): #print( strongs5, j, element.tag, repr(entryString) ) if element.tag == "strongs": if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j==0 and element.text BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag, "md3d" ) if strongs5!='02717' and (3203 > int(strongs5) > 3302): BibleOrgSysGlobals.checkXMLNoTail( element, element.tag, "f3g7" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag, "m56g" ) strongs = element.text if BibleOrgSysGlobals.debugFlag: assert strongs5.endswith( strongs ) if element.tail and element.tail.strip(): entryString += element.tail.strip() elif element.tag == "greek": location = "greek in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "jke0" ) #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "df35" ) # Process the attributes translit = greek = beta = None for attrib,value in element.items(): if attrib=="translit": translit = value elif attrib=="unicode": greek = value elif attrib=="BETA": beta = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if BibleOrgSysGlobals.debugFlag: assert greek and translit and beta if 'word' not in entryResults: # This is the first/main entry if BibleOrgSysGlobals.debugFlag: assert gettingEssentials and j==1 BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) entryResults['word'] = (greek, translit, beta) else: #print( "Have multiple greek entries in " + strongs5 ) if BibleOrgSysGlobals.debugFlag: assert j > 2 gettingEssentials = False entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ) #.replace( '\n', '' ) elif element.tag == "pronunciation": location = "pronunciation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" ) # Process the attributes pronunciation = None for attrib,value in element.items(): if attrib=="strongs": pronunciation = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if gettingEssentials: #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) if BibleOrgSysGlobals.debugFlag: assert j == 2 assert pronunciation assert 'pronunciation' not in entryResults entryResults['pronunciation'] = pronunciation else: if BibleOrgSysGlobals.debugFlag: assert j>2 and not gettingEssentials if element.tail and element.tail.strip(): entryString += element.tail.strip().replace( '\n', '' ) elif element.tag == "strongs_derivation": location = "strongs_derivation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) derivation = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "derivation", repr(derivation) ) if BibleOrgSysGlobals.debugFlag: assert derivation and '\t' not in derivation and '\n' not in derivation entryString += derivation elif element.tag == "strongs_def": location = "strongs_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "jd28" ) definition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "definition", repr(definition) ) if BibleOrgSysGlobals.debugFlag: assert definition and '\t' not in definition and '\n' not in definition entryString += definition elif element.tag == "kjv_def": location = "kjv_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" ) #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" ) KJVdefinition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition entryString += KJVdefinition elif element.tag == "strongsref": location = "strongsref in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "kls2" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "ks24" ) strongsRef = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) if BibleOrgSysGlobals.debugFlag: assert strongsRef and '\t' not in strongsRef and '\n' not in strongsRef strongsRef = re.sub( '<language="GREEK" strongs="(\d{1,5})">', r'<StrongsRef>G\1</StrongsRef>', strongsRef ) strongsRef = re.sub( '<strongs="(\d{1,5})" language="GREEK">', r'<StrongsRef>G\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #print( strongs5, "strongsRef", repr(strongsRef) ) entryString += ' ' + strongsRef elif element.tag == "see": location = "see in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" ) # Process the attributes seeLanguage = seeStrongsNumber = None for attrib,value in element.items(): if attrib == "language": seeLanguage = value elif attrib == "strongs": seeStrongsNumber = value # Note: No leading zeroes here else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if BibleOrgSysGlobals.debugFlag: assert seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit() assert seeLanguage in ('GREEK','HEBREW',) if 'see' not in entryResults: entryResults['see'] = [] entryResults['see'].append( ('G' if seeLanguage=='GREEK' else 'H') + seeStrongsNumber ) else: logging.error( "2d4f Unprocessed {!r} element ({}) in entry".format( element.tag, element.text ) ) if entryString: #print( strongs5, "entryString", repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert '\t' not in entryString and '\n' not in entryString entryString = re.sub( '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString ) if BibleOrgSysGlobals.debugFlag: assert 'strongsref' not in entryString entryResults['Entry'] = entryString #print( "entryResults", entryResults ) self.StrongsEntries[strongs] = entryResults
def validateEntry( self, entry ): """ Check/validate the given Strongs Greek lexicon entry. """ if BibleOrgSysGlobals.debugFlag: assert( entry.tag == "entry" ) BibleOrgSysGlobals.checkXMLNoText( entry, entry.tag, "na19" ) BibleOrgSysGlobals.checkXMLNoTail( entry, entry.tag, "kaq9" ) # Process the entry attributes first strongs5 = None for attrib,value in entry.items(): if attrib == "strongs": strongs5 = value if BibleOrgSysGlobals.verbosityLevel > 2: print( "Validating {} entry...".format( strongs5 ) ) else: logging.warning( "Unprocessed {!r} attribute ({}) in main entry element".format( attrib, value ) ) if BibleOrgSysGlobals.debugFlag: assert( len(strongs5)==5 and strongs5.isdigit() ) entryResults = {} entryString = "" gettingEssentials = True for j, element in enumerate( entry ): #print( strongs5, j, element.tag, repr(entryString) ) if element.tag == "strongs": if BibleOrgSysGlobals.debugFlag: assert( gettingEssentials and j==0 and element.text ) BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag, "md3d" ) if strongs5!='02717' and (3203 > int(strongs5) > 3302): BibleOrgSysGlobals.checkXMLNoTail( element, element.tag, "f3g7" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag, "m56g" ) strongs = element.text if BibleOrgSysGlobals.debugFlag: assert( strongs5.endswith( strongs ) ) if element.tail and element.tail.strip(): entryString += element.tail.strip() elif element.tag == "greek": location = "greek in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "jke0" ) #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "df35" ) # Process the attributes translit = greek = beta = None for attrib,value in element.items(): if attrib=="translit": translit = value elif attrib=="unicode": greek = value elif attrib=="BETA": beta = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if BibleOrgSysGlobals.debugFlag: assert( greek and translit and beta ) if 'word' not in entryResults: # This is the first/main entry if BibleOrgSysGlobals.debugFlag: assert( gettingEssentials and j==1 ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) entryResults['word'] = (greek, translit, beta) else: #print( "Have multiple greek entries in " + strongs5 ) if BibleOrgSysGlobals.debugFlag: assert( j > 2 ) gettingEssentials = False entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ) #.replace( '\n', '' ) elif element.tag == "pronunciation": location = "pronunciation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" ) # Process the attributes pronunciation = None for attrib,value in element.items(): if attrib=="strongs": pronunciation = value else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if gettingEssentials: #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) if BibleOrgSysGlobals.debugFlag: assert( j == 2 ) assert( pronunciation ) assert( 'pronunciation' not in entryResults ) entryResults['pronunciation'] = pronunciation else: if BibleOrgSysGlobals.debugFlag: assert( j>2 and not gettingEssentials ) if element.tail and element.tail.strip(): entryString += element.tail.strip().replace( '\n', '' ) elif element.tag == "strongs_derivation": location = "strongs_derivation in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" ) derivation = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "derivation", repr(derivation) ) if BibleOrgSysGlobals.debugFlag: assert( derivation and '\t' not in derivation and '\n' not in derivation ) entryString += derivation elif element.tag == "strongs_def": location = "strongs_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "jd28" ) definition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "definition", repr(definition) ) if BibleOrgSysGlobals.debugFlag: assert( definition and '\t' not in definition and '\n' not in definition ) entryString += definition elif element.tag == "kjv_def": location = "kjv_def in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoAttributes( element, location, "jke0" ) #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" ) #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" ) KJVdefinition = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) #print( strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert( KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition ) entryString += KJVdefinition elif element.tag == "strongsref": location = "strongsref in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "kls2" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "ks24" ) strongsRef = BibleOrgSysGlobals.getFlattenedXML( element, strongs5 ).replace( '\n', '' ) if BibleOrgSysGlobals.debugFlag: assert( strongsRef and '\t' not in strongsRef and '\n' not in strongsRef ) strongsRef = re.sub( '<language="GREEK" strongs="(\d{1,5})">', r'<StrongsRef>G\1</StrongsRef>', strongsRef ) strongsRef = re.sub( '<strongs="(\d{1,5})" language="GREEK">', r'<StrongsRef>G\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef ) #print( strongs5, "strongsRef", repr(strongsRef) ) entryString += ' ' + strongsRef elif element.tag == "see": location = "see in Strongs " + strongs5 BibleOrgSysGlobals.checkXMLNoText( element, location, "iw9k" ) BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" ) BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "0s20" ) # Process the attributes seeLanguage = seeStrongsNumber = None for attrib,value in element.items(): if attrib == "language": seeLanguage = value elif attrib == "strongs": seeStrongsNumber = value # Note: No leading zeroes here else: logging.warning( "scs4 Unprocessed {!r} attribute ({}) in {}".format( attrib, value, location ) ) if BibleOrgSysGlobals.debugFlag: assert( seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit() ) assert( seeLanguage in ('GREEK','HEBREW',) ) if 'see' not in entryResults: entryResults['see'] = [] entryResults['see'].append( ('G' if seeLanguage=='GREEK' else 'H') + seeStrongsNumber ) else: logging.error( "2d4f Unprocessed {!r} element ({}) in entry".format( element.tag, element.text ) ) if entryString: #print( strongs5, "entryString", repr(entryString) ) if BibleOrgSysGlobals.debugFlag: assert( '\t' not in entryString and '\n' not in entryString ) entryString = re.sub( '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>', r'<StrongsRef>G\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString ) entryString = re.sub( '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>', r'<StrongsRef>H\1</StrongsRef>', entryString ) if BibleOrgSysGlobals.debugFlag: assert( 'strongsref' not in entryString ) entryResults['Entry'] = entryString #print( "entryResults", entryResults ) self.StrongsEntries[strongs] = entryResults