class UpdateDBPVerseTable: def __init__(self, config): self.db = SQLUtility(config) self.OT = self.db.selectSet( "SELECT id FROM books WHERE book_testament = 'OT'", ()) self.NT = self.db.selectSet( "SELECT id FROM books WHERE book_testament = 'NT'", ()) self.statements = [] def loadVerseTable(self): sql = "SELECT id, hash_id FROM %s.bible_filesets WHERE set_type_code = 'text_plain'" % ( SOURCE_DATABASE) filesetIdMap = self.db.selectMap(sql, ()) print(len(filesetIdMap.keys()), "verse filesets found") for filesetId in sorted(filesetIdMap.keys()): hashId = filesetIdMap[filesetId] print(filesetId) self.statements = [] sql = "SELECT distinct book_id FROM " + SOURCE_DATABASE + ".bible_verses WHERE hash_id = %s" bookIdList = self.db.selectSet(sql, (hashId, )) if len(bookIdList) == 0: print("WARNING: plain_text filesetId %s has no verses." % (filesetId)) else: otBooks = bookIdList.intersection(self.OT) ntBooks = bookIdList.intersection(self.NT) setSizeCode = UpdateDBPDatabase.getSetSizeCode( ntBooks, otBooks) bucket = TARGET_ASSET_ID setTypeCode = 'text_plain' newHashId = UpdateDBPDatabase.getHashId( bucket, filesetId, setTypeCode) sql = ( "INSERT INTO bible_filesets(id, hash_id, asset_id, set_type_code," " set_size_code, hidden) VALUES (%s, %s, %s, %s, %s, 0)") values = (filesetId, newHashId, bucket, setTypeCode, setSizeCode) self.statements.append((sql, [values])) sql = ( "SELECT book_id, chapter, verse_start, verse_end, verse_text FROM " + SOURCE_DATABASE + ".bible_verses WHERE hash_id = %s") resultSet = self.db.select(sql, (hashId, )) values = [] for row in resultSet: values.append((newHashId, ) + row) sql = ( "INSERT INTO bible_verses (hash_id, book_id, chapter, verse_start, verse_end, verse_text)" " VALUES (%s, %s, %s, %s, %s, %s)") self.statements.append((sql, values)) self.db.executeTransaction(self.statements) def dropIndex(self): sql = "ALTER TABLE bible_verses DROP INDEX verse_text" self.db.execute(sql, ()) def addIndex(self): sql = "ALTER TABLE bible_verses ADD FULLTEXT INDEX verse_text (verse_text)" self.db.execute(sql, ())
class CreateDAFileset: def __init__(self): self.db = SQLUtility("localhost", 3306, "root", "hls_dbp") def process(self, filesetId, bitrate): resultSet = self.db.select( "SELECT hash_id, asset_id, set_type_code, set_size_code, hidden" " FROM bible_filesets WHERE id = %s", (filesetId)) row = resultSet[0] hashId = row[0] assetId = row[1] setTypeCode = row[2] setSizeCode = row[3] hidden = row[4] newFilesetId = filesetId + bitrate newHashId = self.getHashId(assetId, newFilesetId, setTypeCode) sql = ( "INSERT INTO bible_filesets (id, hash_id, asset_id, set_type_code, set_size_code, hidden)" " VALUES (%s, %s, %s, %s, %s, %s)") self.db.execute(sql, (newFilesetId, newHashId, assetId, setTypeCode, setSizeCode, hidden)) sql = ( "INSERT INTO bible_files (hash_id, book_id, chapter_start, chapter_end, verse_start, verse_end," " file_name, file_size, duration)" " SELECT %s, book_id, chapter_start, chapter_end, verse_start, verse_end," " file_name, file_size, duration FROM bible_files WHERE hash_id = %s" ) self.db.execute(sql, (newHashId, hashId)) def getHashId(self, bucket, filesetId, setTypeCode): md5 = hashlib.md5() md5.update(filesetId.encode("latin1")) md5.update(bucket.encode("latin1")) md5.update(setTypeCode.encode("latin1")) hash_id = md5.hexdigest() return hash_id[:12]
class IdAnalysis: def __init__(self, config): self.config = config self.out = SQLUtility(config.database_host, config.database_port, config.database_user, "analysis") def createAnalysisBucket(self): self.privateCreateTable("buckets") sql = ( "SELECT distinct fileset_id, bible_id, type_code, bucket FROM bucket_listing" ) db = SQLUtility(config.database_host, config.database_port, config.database_user, "valid_dbp") resultSet = db.select(sql, None) results = self.privateTransformType(resultSet) db.close() verses = VersesReader(self.config) verseIds = verses.bibleIdFilesetId() for verseId in verseIds: parts = verseId.split("/") results.append([parts[1], parts[0], "text_plain", "verses"]) print("num %d insert in buckets" % (len(results))) self.out.executeBatch("INSERT INTO buckets VALUES (%s, %s, %s, %s)", results) def createAnalysisBucketNo16(self): self.out.execute("DROP VIEW IF EXISTS buckets_no16", None) sql = "CREATE VIEW buckets_no16 AS SELECT * FROM buckets WHERE right(fileset_id, 2) != '16'" self.out.execute(sql, None) def createAnalysisLPTS(self): self.privateCreateTable("lpts") results = [] reader = LPTSExtractReader(self.config) self.privateAddFilesetId(results, reader.getAudioMap()) self.privateAddFilesetId(results, reader.getTextMap()) self.privateAddFilesetId(results, reader.getVideoMap()) print("num %d insert in lpts" % (len(results))) self.out.executeBatch("INSERT INTO lpts VALUES (%s, %s, %s)", results) def privateAddFilesetId(self, results, filesetMap): for filesetId in filesetMap.keys(): fileset = filesetMap[filesetId] if fileset.Reg_StockNumber() != None: stock_num = fileset.Reg_StockNumber() else: stock_num = "unknown" if fileset.DBP_Equivalent() != None: results.append( [filesetId, fileset.DBP_Equivalent(), stock_num]) if fileset.DBP_Equivalent2() != None: results.append( [filesetId, fileset.DBP_Equivalent2(), stock_num]) if fileset.DBP_Equivalent() == None and fileset.DBP_Equivalent2( ) == None: results.append([filesetId, "unknown", stock_num]) def createAnalysisDBPOnly(self): self.privateCreateTable("dbp_only") sql = ("SELECT f.id, c.bible_id, f.set_type_code, f.asset_id" + " FROM bible_filesets f, bible_fileset_connections c" + " WHERE f.hash_id = c.hash_id") db = SQLUtility(config.database_host, config.database_port, config.database_user, "dbp_only") results = db.select(sql, None) db.close() print("num %d insert in dbp_only" % (len(results))) self.out.executeBatch("INSERT INTO dbp_only VALUES (%s, %s, %s, %s)", results) def createAnalysisDBP(self): self.privateCreateTable("dbp") sql = ("SELECT f.id, c.bible_id, f.set_type_code, f.asset_id" + " FROM bible_filesets f, bible_fileset_connections c" + " WHERE f.hash_id = c.hash_id") db = SQLUtility(config.database_host, config.database_port, config.database_user, "dbp") results = db.select(sql, None) db.close() print("num %d insert in dbp" % (len(results))) self.out.executeBatch("INSERT INTO dbp VALUES (%s, %s, %s, %s)", results) def createDiffViews(self): self.privateCreateDiffView( "IN_buckets_NOT_IN_dbp_only_DIFF_bible_fileset", "buckets", "dbp_only", ["bible_id", "fileset_id"]) self.privateCreateDiffView("IN_buckets_NOT_IN_dbp_DIFF_bible_fileset", "buckets", "dbp", ["bible_id", "fileset_id"]) self.privateCreateDiffView("IN_buckets_NOT_IN_lpts_DIFF_bible_fileset", "buckets_no16", "lpts", ["bible_id", "fileset_id"]) self.privateCreateDiffView( "IN_dbp_only_NOT_IN_buckets_DIFF_bible_fileset", "dbp_only", "buckets", ["bible_id", "fileset_id"]) self.privateCreateDiffView("IN_dbp_NOT_IN_buckets_DIFF_bible_fileset", "dbp", "buckets", ["bible_id", "fileset_id"]) self.privateCreateDiffView("IN_lpts_NOT_IN_buckets_DIFF_bible_fileset", "lpts", "buckets_no16", ["bible_id", "fileset_id"]) self.privateCreateDiffView("IN_buckets_NOT_IN_dbp_only_DIFF_fileset", "buckets", "dbp_only", ["fileset_id"]) self.privateCreateDiffView("IN_buckets_NOT_IN_dbp_DIFF_fileset", "buckets", "dbp", ["fileset_id"]) self.privateCreateDiffView("IN_buckets_NOT_IN_lpts_DIFF_fileset", "buckets_no16", "lpts", ["fileset_id"]) self.privateCreateDiffView("IN_dbp_only_NOT_IN_buckets_DIFF_fileset", "dbp_only", "buckets", ["fileset_id"]) self.privateCreateDiffView("IN_dbp_NOT_IN_buckets_DIFF_fileset", "dbp", "buckets", ["fileset_id"]) self.privateCreateDiffView("IN_lpts_NOT_IN_buckets_DIFF_fileset", "lpts", "buckets_no16", ["fileset_id"]) def close(self): self.out.close() def privateCreateTable(self, tableName): sql = "DROP TABLE IF EXISTS %s" % (tableName) self.out.execute(sql, None) if tableName == "lpts": sql = ("CREATE TABLE lpts (" + " fileset_id varchar(255) not null," + " bible_id varchar(255) not null," + " stock_id varchar(255) not null," + " PRIMARY KEY(fileset_id, bible_id))") else: create = "CREATE TABLE %s (" % tableName sql = (create + " fileset_id varchar(255) not null," + " bible_id varchar(255) not null," + " type_code varchar(255) not null," + " bucket varchar(255) not null," + " PRIMARY KEY(fileset_id, bible_id, type_code, bucket))") print("SQL:", sql) self.out.execute(sql, None) def privateCreateDiffView(self, viewName, selectTable, compareTable, whereColumns): sql = "DROP VIEW IF EXISTS %s" % (viewName) self.out.execute(sql, None) parts = [] for col in whereColumns: parts.append("a.%s=b.%s" % (col, col)) joinClause = " AND ".join(parts) sql = ( "CREATE VIEW %s AS SELECT a.* FROM %s a WHERE NOT EXISTS (SELECT 1 from %s b WHERE %s) ORDER BY fileset_id, bible_id" % (viewName, selectTable, compareTable, joinClause)) self.out.execute(sql, None) def privateTransformType(self, resultSet): results = [] for row in resultSet: row = list(row) typeCode = row[2] if typeCode == "app": setTypeCode = "app" elif typeCode == "audio": filesetId = row[0] code = filesetId[7:9] if code == "1D": setTypeCode = "audio" elif code == "2D": setTypeCode = "audio_drama" else: code = filesetId[8:10] if code == "1D": setTypeCode = "audio" elif code == "2D": setTypeCode = "audio_drama" elif filesetId == "N1TUVDPI": setTypeCode = "audio" elif filesetId == "O1TUVDPI": setTypeCode = "audio" else: print( "WARNING: file type not known for %s, set_type_code set to 'unknown'" % (filesetId)) setTypeCode = "unknown" elif typeCode == "text": setTypeCode = "text_format" elif typeCode == "video": setTypeCode = "video_stream" elif typeCode == "verse": setTypeCode = "text_plain" else: print("ERROR typeCode '%s' is not known" % (typeCode)) sys.exit() row[2] = setTypeCode results.append(row) return results def privateTransformType_obsolete(self, resultSet): results = [] for row in resultSet: row = list(row) setTypeCode = row[2] if setTypeCode == "text_plain": typeCode = "verse" elif setTypeCode == "text_format": typeCode = "text" elif setTypeCode[:5] == "audio": typeCode = "audio" elif setTypeCode == "video_stream": typeCode = "video" else: typeCode = setTypeCode row[2] = typeCode results.append(row) return results def analyzeLPTSMismatches(self): lpts = LPTSExtractReader(self.config) bibleMap = lpts.getBibleIdMap() print("num bibleIds: %d" % (len(bibleMap.keys()))) filesetMap = lpts.getAllFilesetMap() print("num filesetIds: %d" % (len(filesetMap.keys()))) finalResults = [] results = self.out.select( "SELECT * FROM in_buckets_not_in_lpts_diff_bible_fileset", None) for row in results: filesetId = row[0] bibleId = row[1] typeCode = row[2] bucket = row[3] bibleLPTS = bibleMap.get(bibleId) filesetLPTS = filesetMap.get(filesetId) if bibleLPTS != None: bibleStockNum = bibleLPTS.Reg_StockNumber() else: bibleStockNum = None if filesetLPTS != None: filesetStockNum = filesetLPTS.Reg_StockNumber() else: filesetStockNum = None finalResults.append([ filesetId, bibleId, typeCode, bucket, filesetStockNum, bibleStockNum ]) print( row, "filesetId StockNum: %s, bibleId StockNum: %s" % (filesetStockNum, bibleStockNum)) self.out.execute("DROP TABLE IF EXISTS lpts_errors", None) sql = ("CREATE TABLE lpts_errors (" + " fileset_id varchar(255) not null," + " bible_id varchar(255) not null," + " type_code varchar(255) not null," + " bucket varchar(255) not null," + " fileset_stock_num varchar(255) null," + " bible_stock_num varchar(255) null," + " PRIMARY KEY(fileset_id, bible_id, type_code, bucket))") self.out.execute(sql, None) self.out.executeBatch( "INSERT INTO lpts_errors VALUES (%s, %s, %s, %s, %s, %s)", finalResults) def analyzeDBPMismatches(self): finalResults = [] dbpAbsentees = self.out.select( "SELECT * FROM in_buckets_not_in_dbp_diff_bible_fileset", None) for dbpAbsent in dbpAbsentees: filesetId = dbpAbsent[0] bibleId = dbpAbsent[1] typeCode = dbpAbsent[2] bucket = dbpAbsent[3] typeCd = typeCode.split("_")[0] #print(bucket, "%s/%s/%s/" % (typeCd, bibleId, filesetId)) # manually check in buckets bibles = self.out.select("SELECT * FROM dbp.bibles WHERE id=%s", (bibleId)) biblesCount = len(bibles) biblesCon = self.out.select( "SELECT * FROM dbp.bible_fileset_connections WHERE bible_id=%s", (bibleId)) biblesConCount = len(biblesCon) biblesTblMsg = "absent" if biblesCount == 0 else "" biblesConnTblMsg = "absent" if biblesConCount == 0 else "" filesetTblMsg = "" filesetBktMsg = "" filesetTypeMsg = "" filesetConnBibleIdMsg = "" filesetIds = self.out.select( "SELECT * FROM dbp.bible_filesets WHERE id=%s", (filesetId)) filesetIdCount = len(filesetIds) if filesetIdCount == 0: filesetMsg = "not in DBP" filesetTblMsg = "absent" else: filesetType = self.out.select( "SELECT * FROM dbp.bible_filesets WHERE id=%s AND set_type_code=%s", (filesetId, typeCode)) filesetTypeCount = len(filesetType) filesetBkt = self.out.select( "SELECT * FROM dbp.bible_filesets WHERE id=%s AND asset_id=%s", (filesetId, bucket)) filesetBktCount = len(filesetBkt) filesetAll = self.out.select( "SELECT * FROM dbp.bible_filesets WHERE id=%s AND asset_id=%s AND set_type_code=%s", (filesetId, bucket, typeCode)) filesetAllCount = len(filesetAll) if filesetTypeCount > 0 and filesetBktCount == 0: buckets = [] for row in filesetType: buckets.append(row[2]) filesetMsg = "DBP has fileset in %s" % (",".join(buckets)) filesetBktMsg = ",".join(buckets) elif filesetTypeCount == 0 and filesetBktCount > 0: types = [] for row in filesetBkt: types.append(row[3]) filesetMsg = "DBP has fileset as %s" % (",".join(types)) filesetTypeMsg = ",".join(types) elif filesetAllCount > 0: hashId = filesetAll[0][1] matchBibles = self.out.select( "SELECT * from dbp.bible_fileset_connections WHERE hash_id=%s", (hashId)) #print(matchBibles) mismatchBibles = [] for row in matchBibles: mismatchBibles.append(row[1]) filesetMsg = "bible_fileset_connections has bible_id %s" % ( ",".join(mismatchBibles)) filesetConnBibleIdMsg = ", ".join(mismatchBibles) else: filesetMsg = "PUNT" #print(bucket, typeCode, bibleId, filesetId, "%s filesetType: %d filesetBkt: %d filesetAll: %d" % (bibleMsg, filesetTypeCount, filesetBktCount, filesetAllCount)) #filesetConCount = self.out.selectScalar("SELECT count(*) FROM dbp.bible_fileset_connections WHERE") print( bucket, typeCode, bibleId, filesetId, "%s %s %s %s %s %s" % (biblesTblMsg, biblesConnTblMsg, filesetTblMsg, filesetBktMsg, filesetTypeMsg, filesetConnBibleIdMsg)) finalResults.append([ filesetId, bibleId, typeCode, bucket, biblesTblMsg, biblesConnTblMsg, filesetTblMsg, filesetBktMsg, filesetTypeMsg, filesetConnBibleIdMsg ]) self.out.execute("DROP TABLE IF EXISTS dbp_errors", None) sql = ("CREATE TABLE dbp_errors (" + " fileset_id varchar(255) not null," + " bible_id varchar(255) not null," + " type_code varchar(255) not null," + " bucket varchar(255) not null," + " dbp_bibles varchar(255) null," + " dbp_bible_fileset_cons varchar(255) null," + " dbp_bible_filesets varchar(255) null," + " dbp_asset_id varchar(255) null," + " dbp_set_type_code varchar(255) null," + " dbp_conn_bible_id varchar(255) null," + " PRIMARY KEY(fileset_id, bible_id, type_code, bucket))") self.out.execute(sql, None) self.out.executeBatch( "INSERT INTO dbp_errors VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", finalResults)
class Organizations: def __init__(self, config): self.db = SQLUtility(config) self.db.execute("use dbp", ()) count = self.db.selectScalar("select count(*) from bibles", ()) print("count", count) self.textCopyrightSet = set() self.audioCopyrightSet = set() self.videoCopyrightSet = set() self.licensorSet = set() self.coLicensorSet = set() self.stripChars = r"[.,©℗\d/\-_\(\)]" def process(self, lptsReader): orgIdMap = {} ## This logic assumes there are no names with multiple org ids resultSet = self.db.select( "SELECT name, organization_id FROM organization_translations", ()) for row in resultSet: name = row[0].lower() #name = name.replace("la ", "") #name = name.replace(" ", "") #name = name.replace("the", "") #name = row[0].replace("the") #name = row[0].strip() orgIdMap[name.strip()] = row[1] self.possibleMapList = self.db.selectMapList( "SELECT organization_id, name FROM organization_translations", ()) sql = ( "SELECT bf.id AS fileset_id, bf.hash_id, bf.set_type_code, bfco.organization_role," " o.slug, o.id AS organization_id, bfc.bible_id" " FROM bible_filesets bf" " JOIN bible_fileset_copyright_organizations bfco ON bfco.hash_id = bf.hash_id" " JOIN organizations o ON bfco.organization_id = o.id" " JOIN bible_fileset_connections bfc ON bfc.hash_id = bf.hash_id" " ORDER BY bf.id, bf.hash_id") resultSet = self.db.select(sql, ()) matchCount = 0 unMatchCount = 0 skipCount = 0 for row in resultSet: filesetId = row[0] hashId = row[1] setTypeCode = row[2] typeCode = setTypeCode.split("_")[0] organizationRole = row[3] slug = row[4] organizationId = row[5] #name = row[6] bibleId = row[6] if typeCode in {"audio", "text", "video"}: #print(filesetId, hashId, typeCode, organizationRole, slug, organizationId, bibleId) (record, index, status) = lptsReader.getLPTSRecord(typeCode, bibleId, filesetId) if record != None: textCopyright = record.Copyrightc() audioCopyright = record.Copyrightp() videoCopyright = record.Copyright_Video() licensor = record.Licensor() coLicensor = record.CoLicensor() creativeCommons = record.CreativeCommonsText() electronicPub = record.ElectronicPublisher(index) if typeCode == "audio" and organizationRole == 1: orgName = self.parseCopyright(record.Copyrightp()) self.addAudioCopyright(record.Copyrightp()) elif typeCode == "audio" and organizationRole == 2: orgName = self.parseLicensor(record.Licensor()) self.addLicensor(record.Licensor()) elif typeCode == "audio" and organizationRole == 3: orgName = "I don't know" elif typeCode == "text" and organizationRole == 1: orgName = self.parseCopyright(record.Copyrightc()) self.addTextCopyright(record.Copyrightc()) elif typeCode == "text" and organizationRole == 2: orgName = self.parseLicensor(record.Licensor()) self.addLicensor(record.Licensor()) elif typeCode == "text" and organizationRole == 3: orgName = "I don't know" elif typeCode == "video" and organizationRole == 1: orgName = self.parseCopyright(record.Copyright_Video()) self.addVideoCopyright(record.Copyright_Video()) elif typeCode == "video" and organizationRole == 2: orgName = self.parseLicensor(record.licensor()) self.addLicensor(record.Licensor()) elif typeCode == "video" and organizationRole == 3: orgName = "I don't know" else: print("ERROR: unexpected typeCode", typeCode) sys.exit() orgId = self.matchName(orgIdMap, orgName) if orgId != None and orgId == organizationId: matchCount += 1 else: unMatchCount += 1 print( "DBP: %s/%s/%s hash:%s, role: %s, orgId: %s, orgSlug: %s" % (typeCode, bibleId, filesetId, hashId, organizationRole, organizationId, slug)) if orgId != None: print("\tOrg Lookup Result: %d found for %s" % (orgId, orgName)) else: print("\tOrg Lookup Result: No orgId for %s" % (orgName)) if textCopyright != None: print("\tLPTS Copyrightc:", textCopyright) if audioCopyright != None: print("\tLPTS Copyrightp:", audioCopyright) if videoCopyright != None: print("\tLPTS Copyright_Video:", videoCopyright) if licensor != None: print("\tLPTS Licensor:", licensor) if coLicensor != None: print("\tLPTS CoLicensor:", coLicensor) if creativeCommons != None: print("\tLPTS CreativeCommonsText:", creativeCommons) if electronicPub != None: print("\tLPTS ElectronicPublisher:", electronicPub) for possibleName in self.possibleMapList[ organizationId]: print( "\torganization_translations.name for %s: %s" % (organizationId, possibleName)) else: #print("\t*** NO LPTS") skipCount += 1 else: #print("\t SKIP typecode", typeCode) skipCount += 1 print("total=", len(resultSet), " matched=", matchCount, " unmatched=", unMatchCount, " skiped=", skipCount) for copyright in sorted(self.textCopyrightSet): print("text: |%s|" % (copyright)) for copyright in sorted(self.audioCopyrightSet): print("audio: |%s|" % (copyright)) for copyright in sorted(self.videoCopyrightSet): print("video: |%s|" % (copyright)) for licensor in sorted(self.licensorSet): print("licensor: |%s|" % (licensor)) def parseCopyright(self, copyright): if copyright != None: return copyright.lower() else: return None def parseLicensor(self, licensor): print("** input ", licensor) if licensor != None: parts = licensor.lower().split(",", 2) if len(parts) > 1: print("** output", parts[1] + " " + parts[0]) return parts[1] + " " + parts[0] else: return parts[0] return None def matchName(self, orgIdMap, orgName): if orgName != None: if orgName == "sil": return 30 for name in orgIdMap.keys(): if name in orgName: orgId = orgIdMap[name] if orgId == 144: # bible-society-of-uganda return 745 # the-bible-society-of-uganda elif orgId == 72: # bible-society-of-ghana return 727 # the-bible-society-of-ghana elif orgId == 127: # bible-society-of-suriname return 750 # bibles-international elif orgId == 527: # sim return 20 # sim-serving-in-mission #elif orgId == 4: # biblica # return 783 # the-bible-league else: return orgId return None def addTextCopyright(self, copyright): if copyright != None: self.textCopyrightSet.add( re.sub(self.stripChars, "", copyright).strip().replace(" ", " ").replace(" ", " ")) def addAudioCopyright(self, copyright): if copyright != None: self.audioCopyrightSet.add( re.sub(self.stripChars, "", copyright).strip().replace(" ", " ").replace(" ", " ")) def addVideoCopyright(self, copyright): if copyright != None: self.videoCopyrightSet.add( re.sub(self.stripChars, "", copyright).strip().replace(" ", " ").replace(" ", " ")) def addLicensor(self, licensor): if licensor != None: self.licensorSet.add( re.sub(self.stripChars, "", licensor).strip().replace(" ", " ").replace(" ", " ")) def addCoLicensor(self, licensor): if licensor != None: self.licensorSet.add( re.sub(self.stripChars, "", licensor).strip().replace(" ", " ").replace(" ", " "))