def get(self, parsed_args): """ Get a list of entities and resources that match the specified query. """ query = parsed_args["query"] if not query: return {"results": []} username = None results = [] if get_authenticated_user(): username = get_authenticated_user().username # Search for teams. encountered_teams = set() conduct_team_search(username, query, encountered_teams, results) conduct_admined_team_search(username, query, encountered_teams, results) # Search for robot accounts. conduct_robot_search(username, query, results) # Search for repos. conduct_repo_search(username, query, results) # Search for users and orgs. conduct_namespace_search(username, query, results) # Modify the results' scores via how close the query term is to each result's name. for result in results: name = result.get("short_name", result["name"]) lm_score = liquidmetal.score(name, query) or 0.5 result["score"] = result["score"] * lm_score return {"results": sorted(results, key=itemgetter("score"), reverse=True)}
def _match_torrent_name(movie_title, movie_year, torrent_title): ''' Checks if movie_title and torrent_title are a good match movie_title (str): title of movie movie_year (str/int): year of movie release torrent_title (str): title of torrent Helper function for rss_sync. Since torrent indexers don't supply imdbid like NewzNab does we have to compare the titles to find a match. This should be fairly accurate since a backlog search uses name and year to find releases. Checks if the year is in the title, promptly ignores it if the year is not found. Then does a fuzzy title match looking for 70+ token set ratio. Fuzzy match is done with movie title vs torrent name split on the year. This removes release information and matches just on the movie title in the torrent title. Returns bool on match success ''' if movie_year not in torrent_title: return False else: movie = movie_title.replace(':', '.').replace(' ', '.').lower() torrent = torrent_title.replace(' ', '.').replace( ':', '.').split(movie_year)[0].lower() match = lm.score(torrent, movie) * 100 if match > 70: return True else: return False
def check_phrase_similarity(self): self.user_input = TextBlob(self.user_input.lower()).tags self.input_len = len(self.user_input) for phrase_type in PHRASE_TYPES: for phrase in getattr(keywords, phrase_type): score = float() for word in self.user_input: for n in phrase: if word and n not in unimportant_words: score += liquidmetal.score(n, word[0]) / self.input_len if score >= 0.7: # Could be increased/ decreased through testing to find more optimal value self.response = random.choice(getattr(responses, phrase_type)) return True return False
def fuzzy_title(self, titles): ''' Score and remove results based on title match titles (list): titles to match against If titles is an empty list every result is treated as a perfect match Iterates through self.results and removes any entry that does not fuzzy match 'title' > 70. Adds fuzzy_score / 20 points to ['score'] Does not return ''' logging.info('Checking title match.') lst = [] if titles == []: logging.debug( 'No titles available to compare, scoring all as perfect match.' ) for result in self.results: result['score'] += 20 lst.append(result) else: for result in self.results: if result['type'] == 'import' and result not in lst: logging.debug( '{} is an Import, soring as a perfect match.'.format( result['title'])) result['score'] += 20 lst.append(result) continue release = Url.normalize(result['title']) logging.debug('Comparing release {} with titles {}.'.format( result['title'], titles)) matches = [ lm.score(release, Url.normalize(title)) * 100 for title in titles ] if any(match > 70 for match in matches): result['score'] += int(max(matches) / 5) lst.append(result) else: logging.debug( '{} best title match was {}%, removing search result.'. format(release, max(matches))) self.results = lst logging.info('Keeping {} results.'.format(len(self.results)))
def test_score(self): n = liquidmetal.SCORE_NO_MATCH m = liquidmetal.SCORE_MATCH t = liquidmetal.SCORE_TRAILING s = liquidmetal.SCORE_TRAILING_BUT_STARTED b = liquidmetal.SCORE_BUFFER tests = { ('', ''): [t], ('', 'a'): [n], ('a', ''): [t], ('a', 'toolong'): [n], ('a', 'a'): [m], ('a', 'b'): [n], ('abc', ''): [t, t, t], ('abc', 'a'): [m, s, s], ('abc', 'b'): [n, m, t], ('abc', 'c'): [n, n, m], ('abc', 'd'): [n, n, n], ('A', 'a'): [m], ('A', 'b'): [n], ('FooBar', ''): [t, t, t, t, t, t], ('FooBar', 'foo'): [m, m, m, s, s, s], ('FooBar', 'fb'): [m, b, b, m, s, s], ('foobar', 'fb'): [m, n, n, m, s, s], ('FooBar', 'b'): [b, b, b, m, t, t], ('FooBar', 'ooar'): [n, m, m, n, m, m], ('FooBar', 'bab'): [n, n, n, n, n, n], ('Foo Bar', ''): [t, t, t, t, t, t, t], ('Foo Bar', 'foo'): [m, m, m, s, s, s, s], ('Foo Bar', 'fb'): [m, b, b, m, m, s, s], ('Foo-Bar', 'fb'): [m, b, b, m, m, s, s], ('Foo_Bar', 'fb'): [m, b, b, m, m, s, s], ('Foo Bar', 'b'): [b, b, b, m, m, t, t], ('Foo Bar', 'ooar'): [n, m, m, n, n, m, m], ('Foo Bar', 'bab'): [n, n, n, n, n, n, n], ('gnu\'s Not Unix', 'nu'): [b, b, b, b, b, m, m, b, b, m, m, t, t, t], } for k, v in tests.items(): score = round(liquidmetal.score(*k), 12) expected_score = round(sum(v) / len(v), 12) self.assertEqual(score, expected_score)
def test_score(self): n = liquidmetal.SCORE_NO_MATCH m = liquidmetal.SCORE_MATCH t = liquidmetal.SCORE_TRAILING s = liquidmetal.SCORE_TRAILING_BUT_STARTED b = liquidmetal.SCORE_BUFFER tests = { ("", ""): [t], ("", "a"): [n], ("a", ""): [t], ("a", "toolong"): [n], ("a", "a"): [m], ("a", "b"): [n], ("abc", ""): [t, t, t], ("abc", "a"): [m, s, s], ("abc", "b"): [n, m, t], ("abc", "c"): [n, n, m], ("abc", "d"): [n, n, n], ("A", "a"): [m], ("A", "b"): [n], ("FooBar", ""): [t, t, t, t, t, t], ("FooBar", "foo"): [m, m, m, s, s, s], ("FooBar", "fb"): [m, b, b, m, s, s], ("foobar", "fb"): [m, n, n, m, s, s], ("FooBar", "b"): [b, b, b, m, t, t], ("FooBar", "ooar"): [n, m, m, n, m, m], ("FooBar", "bab"): [n, n, n, n, n, n], ("Foo Bar", ""): [t, t, t, t, t, t, t], ("Foo Bar", "foo"): [m, m, m, s, s, s, s], ("Foo Bar", "fb"): [m, b, b, m, m, s, s], ("Foo-Bar", "fb"): [m, b, b, m, m, s, s], ("Foo_Bar", "fb"): [m, b, b, m, m, s, s], ("Foo Bar", "b"): [b, b, b, m, m, t, t], ("Foo Bar", "ooar"): [n, m, m, n, n, m, m], ("Foo Bar", "bab"): [n, n, n, n, n, n, n], ("gnu's Not Unix", "nu"): [b, b, b, b, b, m, m, b, b, m, m, t, t, t], } for k, v in tests.items(): score = round(liquidmetal.score(*k), 12) expected_score = round(sum(v) / len(v), 12) self.assertEqual(score, expected_score)
def _fuzzy_match(self, predb_titles, title, year): ''' Fuzzy matches title with predb titles predb_titles (list): titles in predb response title (str): title to match to rss titles year (str): year of movie release Checks for any fuzzy match over 60% Returns bool ''' movie = Url.normalize('{}.{}'.format(title, year), ascii_only=True).replace(' ', '.') for pdb in predb_titles: if year not in pdb: continue pdb = pdb.split(year)[0] + year match = lm.score(pdb.replace(' ', '.'), movie) * 100 if match > 60: logging.debug('{} matches {} at {}%'.format(pdb, movie, int(match))) return True return False
def test_non_ascii(self): self.assertEqual(1.0, liquidmetal.score("İ", "İ"))
def test_score_order(target, records): ordered = list(get_suggestions(target, records)) scores = [lm.score(r, target) for r in ordered] assert scores == sorted(scores, reverse=True)