def matchGamename(self, results, gamenameFromFile, digits, romes, checkSubtitle, scraperSource, romCollection): highestRatio = 0.0 bestIndex = 0 for i in range(0, len(results)): result = results[i] try: #check if the result has the correct platform (if needed) platformSearchKey = self.resolveParseResult(result, 'PlatformSearchKey') if(platformSearchKey != ''): platform = config.getPlatformByRomCollection(scraperSource, romCollection.name) if(platform != platformSearchKey): Logutil.log('Platform mismatch. %s != %s. Result will be skipped.' %(platform, platformSearchKey), util.LOG_LEVEL_INFO) continue searchKey = self.resolveParseResult(result, 'SearchKey') #keep it for later reference origSearchKey = searchKey gamenameToCheck = gamenameFromFile #searchKey is specified in parserConfig - if no one is specified first result is valid (1 file per game scenario) if(searchKey == ''): Logutil.log('No searchKey found. Using first result', util.LOG_LEVEL_INFO) return result, 1.0 Logutil.log('Comparing %s with %s' %(gamenameToCheck, searchKey), util.LOG_LEVEL_INFO) if(self.compareNames(gamenameToCheck, searchKey, checkSubtitle)): #perfect match return result, 1.0 #try again with normalized names gamenameToCheck = self.normalizeName(gamenameToCheck) searchKey = self.normalizeName(searchKey) Logutil.log('Try normalized names. Comparing %s with %s' %(gamenameToCheck, searchKey), util.LOG_LEVEL_INFO) if(self.compareNames(gamenameToCheck, searchKey, checkSubtitle)): #perfect match return result, 1.0 #try again with replaced sequel numbers sequelGamename = gamenameToCheck sequelSearchKey = searchKey for j in range(0, len(digits)): sequelGamename = sequelGamename.replace(digits[j], romes[j]) sequelSearchKey = sequelSearchKey.replace(digits[j], romes[j]) Logutil.log('Try with replaced sequel numbers. Comparing %s with %s' %(sequelGamename, sequelSearchKey), util.LOG_LEVEL_INFO) if(self.compareNames(sequelGamename, sequelSearchKey, checkSubtitle)): #perfect match return result, 1.0 #remove last char for sequel number 1 from gamename if(gamenameFromFile.endswith(' 1') or gamenameFromFile.endswith(' I')): gamenameRemovedSequel = sequelGamename[:len(sequelGamename)-1] Logutil.log('Try with removed sequel numbers. Comparing %s with %s' %(gamenameRemovedSequel, sequelSearchKey), util.LOG_LEVEL_INFO) if(self.compareNames(gamenameRemovedSequel, sequelSearchKey, checkSubtitle)): #perfect match return result, 1.0 #remove last char for sequel number 1 from result (check with gamenameFromFile because we need the ' ' again) if(origSearchKey.endswith(' 1') or origSearchKey.endswith(' I')): searchKeyRemovedSequels = sequelSearchKey[:len(sequelSearchKey)-1] Logutil.log('Try with removed sequel numbers. Comparing %s with %s' %(sequelGamename, searchKeyRemovedSequels), util.LOG_LEVEL_INFO) if(self.compareNames(sequelGamename, searchKeyRemovedSequels, checkSubtitle)): #perfect match return result, 1.0 ratio = difflib.SequenceMatcher(None, sequelGamename.upper(), sequelSearchKey.upper()).ratio() Logutil.log('No result found. Try to find game by ratio. Comparing %s with %s, ratio: %s' %(sequelGamename, sequelSearchKey, str(ratio)), util.LOG_LEVEL_INFO) if(ratio > highestRatio): highestRatio = ratio bestIndex = i except Exception, (exc): Logutil.log("An error occured while matching the best result: " +str(exc), util.LOG_LEVEL_WARNING)
def scrapeResults(self, results, scraper, urlsFromPreviousScrapers, gamenameFromFile, foldername, filecrc, romFile, fuzzyFactor, updateOption, romCollection, settings): Logutil.log("using parser file: " + scraper.parseInstruction, util.LOG_LEVEL_DEBUG) Logutil.log("using game description: " + scraper.source, util.LOG_LEVEL_DEBUG) scraperSource = scraper.source.decode('utf-8') Logutil.log('Expected platform: {0}'.format(romCollection.name), util.LOG_LEVEL_DEBUG) self.expected_platform = config.getPlatformByRomCollection( scraperSource, romCollection.name) self.fuzzy_factor = fuzzyFactor self.update_option = updateOption self.scraper = scraper # Information about the current game self.crc = filecrc self.foldername = foldername self.romfile = romFile #url to scrape may be passed from the previous scraper if (scraper.source.isdigit()): if (len(urlsFromPreviousScrapers) == 0): Logutil.log( "Configuration error: scraper source is numeric and there is no previous scraper that returned an url to scrape.", util.LOG_LEVEL_ERROR) return results, urlsFromPreviousScrapers, True try: url = urlsFromPreviousScrapers[int(scraper.source) - 1] Logutil.log("using url from previous scraper: " + str(url), util.LOG_LEVEL_INFO) except Exception as e: # FIXME TODO - out of bounds exception Logutil.log( "Configuration error: no url found at index " + str(scraper.source), util.LOG_LEVEL_ERROR) return results, urlsFromPreviousScrapers, True if (scraper.sourceAppend != None and scraper.sourceAppend != ""): url = url + '/' + scraper.sourceAppend Logutil.log( "sourceAppend = '%s'. New url = '%s'" % (scraper.sourceAppend, url), util.LOG_LEVEL_INFO) scraperSource = url if (scraper.source == 'nfo'): scraperSource = self.getNfoFile(settings, romCollection, gamenameFromFile) tempResults = self.parseDescriptionFile(scraper, scraperSource, gamenameFromFile) try: Logutil.log( "Found {0} results for {1} from URL {2}".format( len(tempResults), gamenameFromFile, scraperSource), util.LOG_LEVEL_DEBUG) self.log_results(tempResults) except: # Ignore if an exception since it will be where tempResults is None pass tempResults = self.getBestResults(tempResults, gamenameFromFile) if tempResults is None: #try again without (*) and [*] altname = re.sub('\s\(.*\)|\s\[.*\]|\(.*\)|\[.*\]', '', gamenameFromFile) Logutil.log( "Did not find any matches for {0}, trying again with {1}". format(gamenameFromFile, altname), util.LOG_LEVEL_DEBUG) tempResults = self.parseDescriptionFile(scraper, scraperSource, altname) tempResults = self.getBestResults(tempResults, altname) if tempResults is None: Logutil.log("Still no matches after modifying game name", util.LOG_LEVEL_DEBUG) if (scraper.returnUrl): urlsFromPreviousScrapers.append('') return results, urlsFromPreviousScrapers, True Logutil.log( "After modifying game name, found {0} best results for {1}". format(len(tempResults), altname), util.LOG_LEVEL_DEBUG) if (scraper.returnUrl): try: tempUrl = self.resolveParseResult(tempResults, 'url') urlsFromPreviousScrapers.append(tempUrl) Logutil.log("pass url to next scraper: " + str(tempUrl), util.LOG_LEVEL_INFO) except: Logutil.log( "Should pass url to next scraper, but url is empty.", util.LOG_LEVEL_WARNING) return results, urlsFromPreviousScrapers, True # For each result, compare against already existing results. If the old key value doesn't exist and the new # one does, then use the new value, otherwise retain the value if (tempResults != None): for resultKey in tempResults.keys(): resultValue = [] resultValueOld = results.get(resultKey, []) # unescaping ugly html encoding from websites if (len(resultValueOld) > 0): resultValueOld[0] = HTMLParser.HTMLParser().unescape( resultValueOld[0]) resultValueNew = tempResults.get(resultKey, []) # unescaping ugly html encoding from websites if (len(resultValueNew) > 0): resultValueNew[0] = HTMLParser.HTMLParser().unescape( resultValueNew[0]) if (len(resultValueOld) == 0 and (len(resultValueNew) != 0 and resultValueNew != [ None, ] and resultValueNew != None and resultValueNew != '')): Logutil.log( "No existing value for key {0}, replacing with new value [{1}]" .format(resultKey, ','.join(str(x) for x in resultValueNew)), util.LOG_LEVEL_DEBUG) results[resultKey] = resultValueNew resultValue = resultValueNew else: Logutil.log( "Retaining existing value for key {0} ([{1}])".format( resultKey, ','.join(str(x) for x in resultValueOld)), util.LOG_LEVEL_DEBUG) resultValue = resultValueOld del tempResults return results, urlsFromPreviousScrapers, False