def Search(results, media, lang, manual, movie): ''' AniDB Search assign an anidbid to a series or movie ''' Log.Info("=== AniDB.Search() ===".ljust(157, '=')) FILTER_SEARCH_WORDS = [ ### These are words which cause extra noise due to being uninteresting for doing searches on, Lowercase only #################################### 'to', 'wa', 'ga', 'no', 'age', 'da', 'chou', 'super', 'yo', 'de', 'chan', 'hime', 'ni', 'sekai', # Jp 'a', 'of', 'an', 'the', 'motion', 'picture', 'special', 'oav', 'ova', 'tv', 'special', 'eternal', 'final', 'last', 'one', 'movie', 'me', 'princess', 'theater', 'and', # En Continued 'le', 'la', 'un', 'les', 'nos', 'vos', 'des', 'ses', 'world', 'in', 'another', 'this', 'story', 'life', 'name', # Fr 'i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', 'xi', 'xii', 'xiii', 'xiv', 'xv', 'xvi' ] # Roman digits SPLIT_CHARS = [ ';', ':', '*', '?', ',', '.', '~', '-', '\\', '/' ] #Space is implied, characters forbidden by os filename limitations orig_title = media.title if movie else media.show orig_title_cleansed = common.cleanse_title(orig_title) Log.Info("orig_title: '{}', orig_title_cleansed: '{}'".format( orig_title, orig_title_cleansed)) ### Full title search = 1.3s Log.Info("--- full title ---".ljust(157, '-')) best_aid, best_score, best_title, n = "", 0, "", 0 start_time = time.time() Log.Info('len AniDBTitlesDB: {}'.format(len(AniDBTitlesDB))) for element in AniDBTitlesDB.xpath( u"/animetitles/anime/title[text()[contains(lower-case(.), '%s')]]" % orig_title.lower().replace("'", " ")): aid = element.getparent().get('aid', '') title = element.text if aid == best_aid and best_score >= 100: continue if orig_title == title: title_cleansed, score = title, 100 elif orig_title.lower() == title.lower(): title_cleansed, score = title.lower(), 99 else: #contained in title title_cleansed = common.cleanse_title(title) score1 = 100 * len( String.LongestCommonSubstring(orig_title_cleansed, title_cleansed) ) / max(len(title_cleansed), len(orig_title_cleansed)) - n if max( len(title_cleansed), len(orig_title_cleansed)) else 0 score2 = 100 - 100 * Util.LevenshteinDistance( orig_title_cleansed, title_cleansed) / max( len(title_cleansed), len(orig_title_cleansed)) - n if max( len(title_cleansed), len(orig_title_cleansed)) else 0 score = max(score1, score2) if score >= 100 and not aid == best_aid: n += 1 results.Append( MetadataSearchResult(id="%s-%s" % ("anidb", aid), name="%s [%s-%s]" % (title, "anidb", aid), year=media.year, lang=lang, score=score)) Log.Info( "[+] score: {:>3}, aid: {:>5}, title: '{}', title_cleansed: {}". format(score, aid, title, title_cleansed)) if score > best_score: best_score, best_title, best_aid = score, title, aid if best_score: Log.Info( "[=] best_score: {:>3}, best_aid: {:>5}, best_title: {}".format( best_score, best_aid, best_title)) Log.Info("elapsed_time: {:.3f}".format(time.time() - start_time)) if best_score >= 90: return best_score, n ### Keyword match using Xpath Log.Info("--- keyword ---".ljust(157, '-')) words, words_skipped = [], [] for i in SPLIT_CHARS: orig_title_cleansed = orig_title_cleansed.replace(i, " ") orig_title_cleansed = orig_title_cleansed.replace("'", '') for word in orig_title_cleansed.split(): (words_skipped if word in FILTER_SEARCH_WORDS or len(word) <= 3 else words).append(word) if not words: words, words_skipped = orig_title_cleansed.split(), [ ] #Prevent CRITICAL Exception in the search function of agent named 'HamaTV', called with keyword arguments {'show': 'No 6', 'id': '20145', 'year': None} (most recent call last): Log.Info("Keyword Search - Words: {}, skipped: {}".format( str(words), str(words_skipped))) type_order = ('main', 'official', 'syn', 'short', '') best_score, best_title, best_aid, best_type, best_lang = 0, "", "", "", "" last_chance, best_score_entry = [], 0 start_time = time.time() for element in AniDBTitlesDB.xpath(u"/animetitles/anime[title[{}]]".format( " or ".join([ "contains(lower-case(text()), '{}')".format(x.lower()) for x in words ]))): aid = element.get('aid', '') best_score_entry, best_title_entry, best_type_entry, best_lang_entry = 0, "", "", "" for element in element.xpath(u"title[%s]" % " or ".join( ["contains(lower-case(text()), '%s')" % x.lower() for x in words])): title = element.text Type = element.get('type', '') Lang = element.get('{http://www.w3.org/XML/1998/namespace}lang', '') title_cleansed = common.cleanse_title(title) if title_cleansed == orig_title_cleansed: score = 98 if ';' not in title else 100 else: score = WordsScore( orig_title_cleansed.split(), title_cleansed ) # - type_order.index(Type) #Movies can have same title if score > best_score_entry or score == best_score_entry and ( not best_type_entry or type_order.index(Type) < type_order.index(best_type_entry)): best_score_entry, best_title_entry, best_type_entry, best_lang_entry, best_title_entry_cleansed = score, title, Type, Lang, title_cleansed if best_score_entry < 25: last_chance.append((best_score_entry, best_title_entry, best_type_entry, best_lang_entry, aid)) continue Log.Info('[-] score: {:>3}, aid: {:>5}, title: "{}"'.format( best_score_entry, aid, best_title_entry)) #Log.Info("levenstein: {}".format(100 - 200 * Util.LevenshteinDistance(title_cleansed, orig_title_cleansed) / (len(title_cleansed) + len(orig_title_cleansed)) )) results.Append( MetadataSearchResult( id="%s-%s" % ("anidb", aid), name="{title} [{Type}({Lang})] [anidb-{aid}]".format( title=best_title_entry, aid=aid, Type=best_type_entry, Lang=best_lang_entry), year=media.year, lang=lang, score=best_score_entry)) if best_score_entry > best_score: best_score, best_title, best_type, best_lang, best_aid = best_score_entry, best_title_entry, best_type_entry, best_lang_entry, aid if best_score < 50: # Add back lower than 25 if nothin above 50 for best_score_entry, best_title_entry, best_type_entry, best_lang_entry, aid in last_chance: Log.Info('[-] score: {:>3}, aid: {:>5}, title: "{}"'.format( best_score_entry, best_title_entry, aid)) results.Append( MetadataSearchResult( id="%s-%s" % ("anidb", aid), name="{title} [{Type}({Lang}): {aid}]".format( title=best_title_entry, aid=aid, Type=best_type_entry, Lang=best_lang_entry), year=media.year, lang=lang, score=best_score_entry)) if best_score_entry > best_score: best_score, best_title, best_type, best_lang, best_aid = best_score_entry, best_title_entry, best_type_entry, best_lang_entry, aid #Log.Info(' --- ----- ---------------------------------------------------') #Log.Info('[=] score: {:>3}, aid: {:>5}, title: "{}"'.format(best_score, best_aid, best_title)) Log.Info("elapsed_time: {:.3f}".format(time.time() - start_time)) return best_score, n
def Search(results, media, lang, manual, movie): ''' AniDB Search assign an anidbid to a series or movie ''' Log.Info("=== AniDB.Search() ===".ljust(157, '=')) FILTER_SEARCH_WORDS = [ ### These are words which cause extra noise due to being uninteresting for doing searches on, Lowercase only #################################### 'to', 'wa', 'ga', 'no', 'age', 'da', 'chou', 'super', 'yo', 'de', 'chan', 'hime', 'ni', 'sekai', # Jp 'a', 'of', 'an', 'the', 'motion', 'picture', 'special', 'oav', 'ova', 'tv', 'special', 'eternal', 'final', 'last', 'one', 'movie', 'me', 'princess', 'theater', 'and', # En Continued 'le', 'la', 'un', 'les', 'nos', 'vos', 'des', 'ses', 'world', 'in', 'another', 'this', 'story', 'life', 'name', # Fr 'i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', 'xi', 'xii', 'xiii', 'xiv', 'xv', 'xvi'] # Roman digits SPLIT_CHARS = [';', ':', '*', '?', ',', '.', '~', '-', '\\', '/' ] #Space is implied, characters forbidden by os filename limitations orig_title = media.title if movie else media.show orig_title_cleansed = common.cleanse_title(orig_title) Log.Info("orig_title: '{}', orig_title_cleansed: '{}'".format(orig_title, orig_title_cleansed)) ### Full title search = 1.3s Log.Info("--- full title ---".ljust(157, '-')) best_aid, best_score, best_title, n = "", 0, "", 0 start_time = time.time() Log.Info('len AniDBTitlesDB: {}'.format(len(AniDBTitlesDB))) for element in AniDBTitlesDB.xpath(u"/animetitles/anime/title[text()[contains(lower-case(.), '%s')]]" % orig_title.lower().replace("'", " ")): aid = element.getparent().get('aid', '') title = element.text if aid==best_aid and best_score>=100: continue if orig_title == title : title_cleansed, score = title, 100 elif orig_title.lower() == title.lower() : title_cleansed, score = title.lower(), 99 else: #contained in title title_cleansed = common.cleanse_title(title) score1 = 100*len(String.LongestCommonSubstring(orig_title_cleansed, title_cleansed))/max(len(title_cleansed), len(orig_title_cleansed))-n if max(len(title_cleansed), len(orig_title_cleansed)) else 0 score2 = 100 - 100 * Util.LevenshteinDistance (orig_title_cleansed, title_cleansed) /max(len(title_cleansed), len(orig_title_cleansed))-n if max(len(title_cleansed), len(orig_title_cleansed)) else 0 score=max(score1, score2) if score>=100 and not aid==best_aid: n+=1 results.Append(MetadataSearchResult(id="%s-%s" % ("anidb", aid), name="%s [%s-%s]" % (title, "anidb", aid), year=media.year, lang=lang, score=score)) Log.Info("[+] score: {:>3}, aid: {:>5}, title: '{}', title_cleansed: {}".format(score, aid, title, title_cleansed)) if score > best_score: best_score, best_title, best_aid = score, title, aid if best_score: Log.Info("[=] best_score: {:>3}, best_aid: {:>5}, best_title: {}".format(best_score, best_aid, best_title)) Log.Info("elapsed_time: {:.3f}".format(time.time() - start_time )) if best_score>=90: return best_score, n ### Keyword match using Xpath Log.Info("--- keyword ---".ljust(157, '-')) words, words_skipped = [], [] for i in SPLIT_CHARS: orig_title_cleansed = orig_title_cleansed.replace(i, " ") orig_title_cleansed = orig_title_cleansed.replace("'", '') for word in orig_title_cleansed.split(): (words_skipped if word in FILTER_SEARCH_WORDS or len(word) <= 3 else words).append(word) if not words: words, words_skipped = orig_title_cleansed.split(), [] #Prevent CRITICAL Exception in the search function of agent named 'HamaTV', called with keyword arguments {'show': 'No 6', 'id': '20145', 'year': None} (most recent call last): Log.Info("Keyword Search - Words: {}, skipped: {}".format(str(words), str(words_skipped))) type_order=('main', 'official', 'syn', 'short', '') best_score, best_title, best_aid, best_type, best_lang = 0, "", "", "", "" last_chance, best_score_entry=[], 0 start_time = time.time() for element in AniDBTitlesDB.xpath(u"/animetitles/anime[title[{}]]".format(" or ".join(["contains(lower-case(text()), '{}')".format(x.lower()) for x in words]) )): aid = element.get('aid', '') best_score_entry, best_title_entry, best_type_entry, best_lang_entry = 0, "", "", "" for element in element.xpath(u"title[%s]" % " or ".join(["contains(lower-case(text()), '%s')" % x.lower() for x in words]) ): title = element.text Type = element.get('type', '') Lang = element.get('{http://www.w3.org/XML/1998/namespace}lang', '') title_cleansed = common.cleanse_title(title) if title_cleansed == orig_title_cleansed: score = 98 if ';' not in title else 100 else: score = WordsScore(orig_title_cleansed.split(), title_cleansed) # - type_order.index(Type) #Movies can have same title if score>best_score_entry or score==best_score_entry and (not best_type_entry or type_order.index(Type)<type_order.index(best_type_entry)): best_score_entry, best_title_entry, best_type_entry, best_lang_entry, best_title_entry_cleansed = score, title, Type, Lang, title_cleansed if best_score_entry<25: last_chance.append((best_score_entry, best_title_entry, best_type_entry, best_lang_entry, aid)); continue Log.Info('[-] score: {:>3}, aid: {:>5}, title: "{}"'.format(best_score_entry, aid, best_title_entry)) #Log.Info("levenstein: {}".format(100 - 200 * Util.LevenshteinDistance(title_cleansed, orig_title_cleansed) / (len(title_cleansed) + len(orig_title_cleansed)) )) results.Append(MetadataSearchResult(id="%s-%s" % ("anidb", aid), name="{title} [{Type}({Lang})] [anidb-{aid}]".format(title=best_title_entry, aid=aid, Type=best_type_entry, Lang=best_lang_entry), year=media.year, lang=lang, score=best_score_entry)) if best_score_entry > best_score: best_score, best_title, best_type, best_lang, best_aid = best_score_entry, best_title_entry, best_type_entry, best_lang_entry, aid if best_score <50: # Add back lower than 25 if nothin above 50 for best_score_entry, best_title_entry, best_type_entry, best_lang_entry, aid in last_chance: Log.Info('[-] score: {:>3}, aid: {:>5}, title: "{}"'.format(best_score_entry, best_title_entry, aid)) results.Append(MetadataSearchResult(id="%s-%s" % ("anidb", aid), name="{title} [{Type}({Lang}): {aid}]".format(title=best_title_entry, aid=aid, Type=best_type_entry, Lang=best_lang_entry), year=media.year, lang=lang, score=best_score_entry)) if best_score_entry > best_score: best_score, best_title, best_type, best_lang, best_aid = best_score_entry, best_title_entry, best_type_entry, best_lang_entry, aid #Log.Info(' --- ----- ---------------------------------------------------') #Log.Info('[=] score: {:>3}, aid: {:>5}, title: "{}"'.format(best_score, best_aid, best_title)) Log.Info("elapsed_time: {:.3f}".format(time.time() - start_time )) return best_score, n
AniDBTitlesDB = None ANIDB_API_DOMAIN = 'api.anidb.net:9001' ANIDB_HTTP_API_URL = ANIDB_PROTOCOL + ANIDB_API_DOMAIN + '/httpapi?request=anime&client=hama&clientver=1&protover=1&aid=' ANIDB_IMAGE_DOMAIN = 'img7.anidb.net' ANIDB_PIC_BASE_URL = ANIDB_PROTOCOL + ANIDB_IMAGE_DOMAIN + '/pics/anime/' # AniDB picture directory ANIDB_PIC_THUMB_URL = ANIDB_PROTOCOL + ANIDB_IMAGE_DOMAIN + '/pics/anime/thumbs/150/{name}.jpg-thumb.jpg' AniDBBan = False ### Functions ### # Define custom functions to be available in 'xpath' calls ns = etree.FunctionNamespace(None) ns['lower-case'] = lambda context, s: s[0].lower() ns['clean-title'] = lambda context, s: common.cleanse_title(s) def Search(results, media, lang, manual, movie): ''' AniDB Search assign an anidbid to a series or movie ''' Log.Info("=== AniDB.Search() ===".ljust(157, '=')) FILTER_SEARCH_WORDS = [ ### These are words which cause extra noise due to being uninteresting for doing searches on, Lowercase only #################################### 'to', 'wa', 'ga', 'no', 'age', 'da', 'chou', 'super',
### AniBD ### #http://api.anidb.net:9001/httpapi?request=anime&client=hama&clientver=1&protover=1&aid=6481 ### Imports ### import common # Functions: SaveFile, LoadFile, metadata_download, WriteLogs, cleanse_title, getImagesFromASS import os # Functions: import re # Functions: re.search, re.match, re.sub, re.IGNORECASE import string # Functions: import datetime # Functions: import time # Functions: import AnimeLists from common import GetXml, Dict, SaveDict, natural_sort_key, Log, DictString from lxml import etree ns = etree.FunctionNamespace(None) ns['lower-case' ] = lambda context, s: s[0].lower() ns['clean-title'] = lambda context, s: common.cleanse_title(s) ### Variables ### ### Always on variables ### AniDBTitlesDB = None ### Functions ### def Search(results, media, lang, manual, movie): ''' AniDB Search assign an anidbid to a series or movie ''' Log.Info("=== AniDB.Search() ===".ljust(157, '=')) FILTER_SEARCH_WORDS = [ ### These are words which cause extra noise due to being uninteresting for doing searches on, Lowercase only #################################### 'to', 'wa', 'ga', 'no', 'age', 'da', 'chou', 'super', 'yo', 'de', 'chan', 'hime', 'ni', 'sekai', # Jp 'a', 'of', 'an', 'the', 'motion', 'picture', 'special', 'oav', 'ova', 'tv', 'special', 'eternal', 'final', 'last', 'one', 'movie', 'me', 'princess', 'theater', 'and', # En Continued 'le', 'la', 'un', 'les', 'nos', 'vos', 'des', 'ses', 'world', 'in', 'another', 'this', 'story', 'life', 'name', # Fr