def check_player_against_rankings(player, rankings): rankings = csv.DictReader(open(rankings)) matches = [] #print("checking for \"%s\" in rankings with initial search" % player) for row in rankings: split_name = player.lower().split(" ") stored_name = row['playername'] # using this for the refinement search so we get better results stored_name_lower = row['playername'].lower() # remove any single character tuples as it will unnecessarily cause # it to search any row with that character in the name split_name = [el for el in split_name if len(el) > 1] if any(s in stored_name_lower for s in split_name): match_score = name_tools.match(player, stored_name) # if we get a perfect match, automatically return that if match_score == 1.0: return (stored_name) elif match_score > 0.61: #print(match_score, stored_name) matches.append((match_score, stored_name)) if matches == []: #print("could not find a match for \"%s\"" % player) return None else: sorted_matches = sorted(matches, key=lambda tup: tup[0], reverse=True) best_match = sorted_matches[0] #print("best match for %s is %s" % (player, best_match)) return best_match[1]
def get(self, name): print(name) t = 0.1 with open(config.database) as json_file: data = json.load(json_file) for p in data['nodes']: dist = name_tools.match(p['id'], name) if dist > t: t = dist return t * 10
def check_player_by_nickname(player, nicknames): #print("Seeing if \"%s\" is a known nickname..." % player) nicknames = csv.DictReader(open(nicknames)) for row in nicknames: stored_nickname = row['nickname'] match_score = name_tools.match(player, stored_nickname) if match_score >= 0.95: identified_player = row['playername'] #print("Found a match: " + player + "=" + stored_nickname,identified_player, match_score) return identified_player else: #("Didn't find %s in nicknames list" % player) return None
def first_pass(self): # first check for an explicit same as if self.value1 and self.value2: self.match.append('match= ' + self.value1 + ' and ' + self.value2 + ' --') if self.sameas(self.value1, self.value2): self.match.append('match-sameas--confidence=' + str( 1.0) + '--There is an explicit sameas (or equivalent) relationship between ' + self.value1label + ' (' + self.value1 + ') and ' + self.value2label + ' (' + self.value1 + '). These are the same person.--') self.overallconfidence = 1.0 return self.match # if no explicit sameas is in place, check the names else: self.match.append('match-sameas--There is no explicit sameas relationship between ' + self.value1label + ' (' + self.value1 + ') and ' + self.value2label + ' (' + self.value2 + '). Invstigate further ... --') names1 = self.checkname(self.value1, 1) names2 = self.checkname(self.value2, 2) scores = [] if names1 and names2: for nm in names1: for n in names2: # get a confidence score using name_tools scores.append(name_tools.match(name_tools.canonicalize(nm), name_tools.canonicalize(n))) confidence = 0 for score in scores: if score > confidence: confidence = score if confidence >= 0.75: self.match.append('match-name--confidence=' + str( confidence) + '--The names are alike. Checking for dates ... --') elif confidence < 0.75 and confidence > 0.5: self.match.append('match-name--confidence=' + str( confidence) + '--The names have some similarities, they could be the same person. Checking dates to be sure ... --') elif confidence < 0.5: self.match.append('match-name--confidence=' + str( confidence) + '--The names are different, indicating that these are not likely to be the same person. Checking dates to be sure ... --') self.overallconfidence = score # check the dates self.dates1 = self.checkdates(self.value1) self.dates2 = self.checkdates(self.value2) # if there are no dates for either check for sameas links to other sources if not self.dates1 and not self.dates2: self.match.append('match-dates--Neither of the names have associated dates. Checking for sameas links to other sources ... )' + '--') # run a second pass on value 1 dates = self.secondpass(self.value1) if dates: for d in dates: self.dates1 = d self.match.append( 'match-dates--These dates were found: ' + d + ' but there is nothing to compare them to--') else: self.match.append( 'match-dates--No dates were found for ' + self.value1label + ' (' + self.value1 + ')--') # run a second pass on value 2 dates = self.secondpass(self.value2) if dates: for d in dates: self.dates2 = d if self.dates1 and d in self.dates1: self.match.append( 'match-dates--confidence=1.0--There is an exact match with birth and death dates (' + str(d) + ' and ' + str(self.dates1) + '). These are very likely the same person.--') elif self.dates1 and d not in self.dates1: self.match.append( 'match-dates--confidence=0--The dates do not match (' + str(d) + ' and ' + str(self.dates1) +'). These are not likely to be the same person.--') else: self.match.append( 'match-dates--These dates were found: ' + d + ' but there is nothing to compare them to--') else: self.match.append( 'match-dates--No dates were found for ' + self.value2label + ' (' + self.value2 + ')--') #return self.match return self.match # if there is one date, look for sameas links for the other elif not self.dates1 and self.dates2: self.match.append('match-dates--There are no birth or death dates for ' + self.value1label + ' (' + self.value1 + '). Checking for sameas links to other sources ... --') # we could have more than one dates = self.secondpass(self.value1) if dates: for d in dates: if d in self.dates2: self.match.append( 'match-dates--confidence=1.0--There is an exact match with birth and death dates (' + str(d) + ' and ' + str(self.dates2) + '). These are very likely the same person.--') self.overallconfidence = 1.0 return self.match else: self.match.append( 'match-dates--confidence=0--The dates do not match (' + str(d) + ' and ' + str(self.dates2) +'). These are not likely to be the same person.--') self.overallconfidence = 0 return self.match else: self.match.append( 'match-dates--confidence=0--We only have one date (' + str(self.dates2) + ') and nothing to compare it to.--') self.overallconfidence = 0 return self.match # if there is one date, look for sameas links for the other elif not self.dates2 and self.dates1: self.match.append('match-dates--There are no birth or death dates for ' + self.value2label + ' (' + self.value2 + '). Checking for sameas links to other sources ... --') dates = self.secondpass(self.value2) if dates: for d in dates: if d in self.dates1: self.match.append( 'match-dates--confidence=1.0--There is an exact match with birth and death dates (' + str(self.dates1) + ' and ' + str(d) + '). These are very likely the same person.--') self.overallconfidence = 1.0 return self.match else: self.match.append( 'match-dates--confidence=0--The dates do not match (' + str(self.dates1) + ' and ' + str(d) +'). These are not likely to be the same person.--') self.overallconfidence = 0 return self.match else: self.match.append( 'match-dates--confidence=0--We only have one date (' + str(self.dates1) + ') and nothing to compare it to.--') return self.match # otherwise we have two dates, test them else: for dt in self.dates1: for d in self.dates2: if dt == d: self.match.append('match-dates--confidence=' + str( 1.0) + '--There is an exact match with birth and death dates (' + str(self.dates1) + ' and ' + str(self.dates2) + '). These are very likely the same person.--') self.overallconfidence = 1.0 return self.match else: self.match.append('match-dates--confidence=' + str( 0) + '--The dates do not match (' + str(self.dates1) + ' and ' + str(self.dates2) + '). These are not likely to be the same person.--') self.overallconfidence = 0 return self.match # if we don't have a name one or both there's not much more we can do else: if not names1 and not names2: self.match.append('match-name--There are no names specified--') elif not names1: self.match.append('match-name--There is no name for ' + self.value1label + ' (' + self.value1 + ')--') elif not names2: self.match.append('match-name--There is no name for ' + self.value2label + ' (' + self.value2 + ')--') return self.match else: count = self.listcount - 1 if count < 2: self.match.append('match--insufficient items in the list count=' + str(count)) return self.match