Ejemplo n.º 1
0
Archivo: search.py Proyecto: zhill/quay
    def get(self, parsed_args):
        """ Get a list of entities and resources that match the specified query. """
        query = parsed_args["query"]
        if not query:
            return {"results": []}

        username = None
        results = []

        if get_authenticated_user():
            username = get_authenticated_user().username

            # Search for teams.
            encountered_teams = set()
            conduct_team_search(username, query, encountered_teams, results)
            conduct_admined_team_search(username, query, encountered_teams, results)

            # Search for robot accounts.
            conduct_robot_search(username, query, results)

        # Search for repos.
        conduct_repo_search(username, query, results)

        # Search for users and orgs.
        conduct_namespace_search(username, query, results)

        # Modify the results' scores via how close the query term is to each result's name.
        for result in results:
            name = result.get("short_name", result["name"])
            lm_score = liquidmetal.score(name, query) or 0.5
            result["score"] = result["score"] * lm_score

        return {"results": sorted(results, key=itemgetter("score"), reverse=True)}
Ejemplo n.º 2
0
def _match_torrent_name(movie_title, movie_year, torrent_title):
    ''' Checks if movie_title and torrent_title are a good match
    movie_title (str): title of movie
    movie_year (str/int): year of movie release
    torrent_title (str): title of torrent

    Helper function for rss_sync.

    Since torrent indexers don't supply imdbid like NewzNab does we have to compare
        the titles to find a match. This should be fairly accurate since a backlog
        search uses name and year to find releases.

    Checks if the year is in the title, promptly ignores it if the year is not found.
    Then does a fuzzy title match looking for 70+ token set ratio. Fuzzy match is done
        with movie title vs torrent name split on the year. This removes release
        information and matches just on the movie title in the torrent title.

    Returns bool on match success
    '''

    if movie_year not in torrent_title:
        return False
    else:
        movie = movie_title.replace(':', '.').replace(' ', '.').lower()
        torrent = torrent_title.replace(' ', '.').replace(
            ':', '.').split(movie_year)[0].lower()
        match = lm.score(torrent, movie) * 100
        if match > 70:
            return True
        else:
            return False
Ejemplo n.º 3
0
 def check_phrase_similarity(self):
     self.user_input = TextBlob(self.user_input.lower()).tags
     self.input_len = len(self.user_input)
     for phrase_type in PHRASE_TYPES:
         for phrase in getattr(keywords, phrase_type):
             score = float()
             for word in self.user_input:
                 for n in phrase:
                     if word and n not in unimportant_words:
                         score += liquidmetal.score(n, word[0]) / self.input_len
             if score >= 0.7:  # Could be increased/ decreased through testing to find more optimal value
                 self.response = random.choice(getattr(responses, phrase_type))
                 return True
     return False
Ejemplo n.º 4
0
    def fuzzy_title(self, titles):
        ''' Score and remove results based on title match
        titles (list): titles to match against

        If titles is an empty list every result is treated as a perfect match

        Iterates through self.results and removes any entry that does not
            fuzzy match 'title' > 70.
        Adds fuzzy_score / 20 points to ['score']

        Does not return
        '''

        logging.info('Checking title match.')

        lst = []
        if titles == []:
            logging.debug(
                'No titles available to compare, scoring all as perfect match.'
            )
            for result in self.results:
                result['score'] += 20
                lst.append(result)
        else:
            for result in self.results:
                if result['type'] == 'import' and result not in lst:
                    logging.debug(
                        '{} is an Import, soring as a perfect match.'.format(
                            result['title']))
                    result['score'] += 20
                    lst.append(result)
                    continue
                release = Url.normalize(result['title'])

                logging.debug('Comparing release {} with titles {}.'.format(
                    result['title'], titles))
                matches = [
                    lm.score(release, Url.normalize(title)) * 100
                    for title in titles
                ]
                if any(match > 70 for match in matches):
                    result['score'] += int(max(matches) / 5)
                    lst.append(result)
                else:
                    logging.debug(
                        '{} best title match was {}%, removing search result.'.
                        format(release, max(matches)))
        self.results = lst
        logging.info('Keeping {} results.'.format(len(self.results)))
Ejemplo n.º 5
0
    def test_score(self):
        n = liquidmetal.SCORE_NO_MATCH
        m = liquidmetal.SCORE_MATCH
        t = liquidmetal.SCORE_TRAILING
        s = liquidmetal.SCORE_TRAILING_BUT_STARTED
        b = liquidmetal.SCORE_BUFFER

        tests = {
            ('', ''): [t],
            ('', 'a'): [n],
            ('a', ''): [t],
            ('a', 'toolong'): [n],
            ('a', 'a'): [m],
            ('a', 'b'): [n],
            ('abc', ''): [t, t, t],
            ('abc', 'a'): [m, s, s],
            ('abc', 'b'): [n, m, t],
            ('abc', 'c'): [n, n, m],
            ('abc', 'd'): [n, n, n],
            ('A', 'a'): [m],
            ('A', 'b'): [n],
            ('FooBar', ''): [t, t, t, t, t, t],
            ('FooBar', 'foo'): [m, m, m, s, s, s],
            ('FooBar', 'fb'): [m, b, b, m, s, s],
            ('foobar', 'fb'): [m, n, n, m, s, s],
            ('FooBar', 'b'): [b, b, b, m, t, t],
            ('FooBar', 'ooar'): [n, m, m, n, m, m],
            ('FooBar', 'bab'): [n, n, n, n, n, n],
            ('Foo Bar', ''): [t, t, t, t, t, t, t],
            ('Foo Bar', 'foo'): [m, m, m, s, s, s, s],
            ('Foo Bar', 'fb'): [m, b, b, m, m, s, s],
            ('Foo-Bar', 'fb'): [m, b, b, m, m, s, s],
            ('Foo_Bar', 'fb'): [m, b, b, m, m, s, s],
            ('Foo Bar', 'b'): [b, b, b, m, m, t, t],
            ('Foo Bar', 'ooar'): [n, m, m, n, n, m, m],
            ('Foo Bar', 'bab'): [n, n, n, n, n, n, n],
            ('gnu\'s Not Unix', 'nu'): [b, b, b, b, b, m, m,
                                        b, b, m, m, t, t, t],
        }

        for k, v in tests.items():
            score = round(liquidmetal.score(*k), 12)
            expected_score = round(sum(v) / len(v), 12)
            self.assertEqual(score, expected_score)
Ejemplo n.º 6
0
    def test_score(self):
        n = liquidmetal.SCORE_NO_MATCH
        m = liquidmetal.SCORE_MATCH
        t = liquidmetal.SCORE_TRAILING
        s = liquidmetal.SCORE_TRAILING_BUT_STARTED
        b = liquidmetal.SCORE_BUFFER

        tests = {
            ("", ""): [t],
            ("", "a"): [n],
            ("a", ""): [t],
            ("a", "toolong"): [n],
            ("a", "a"): [m],
            ("a", "b"): [n],
            ("abc", ""): [t, t, t],
            ("abc", "a"): [m, s, s],
            ("abc", "b"): [n, m, t],
            ("abc", "c"): [n, n, m],
            ("abc", "d"): [n, n, n],
            ("A", "a"): [m],
            ("A", "b"): [n],
            ("FooBar", ""): [t, t, t, t, t, t],
            ("FooBar", "foo"): [m, m, m, s, s, s],
            ("FooBar", "fb"): [m, b, b, m, s, s],
            ("foobar", "fb"): [m, n, n, m, s, s],
            ("FooBar", "b"): [b, b, b, m, t, t],
            ("FooBar", "ooar"): [n, m, m, n, m, m],
            ("FooBar", "bab"): [n, n, n, n, n, n],
            ("Foo Bar", ""): [t, t, t, t, t, t, t],
            ("Foo Bar", "foo"): [m, m, m, s, s, s, s],
            ("Foo Bar", "fb"): [m, b, b, m, m, s, s],
            ("Foo-Bar", "fb"): [m, b, b, m, m, s, s],
            ("Foo_Bar", "fb"): [m, b, b, m, m, s, s],
            ("Foo Bar", "b"): [b, b, b, m, m, t, t],
            ("Foo Bar", "ooar"): [n, m, m, n, n, m, m],
            ("Foo Bar", "bab"): [n, n, n, n, n, n, n],
            ("gnu's Not Unix", "nu"): [b, b, b, b, b, m, m, b, b, m, m, t, t, t],
        }

        for k, v in tests.items():
            score = round(liquidmetal.score(*k), 12)
            expected_score = round(sum(v) / len(v), 12)
            self.assertEqual(score, expected_score)
Ejemplo n.º 7
0
    def _fuzzy_match(self, predb_titles, title, year):
        ''' Fuzzy matches title with predb titles
        predb_titles (list): titles in predb response
        title (str): title to match to rss titles
        year (str): year of movie release

        Checks for any fuzzy match over 60%

        Returns bool
        '''

        movie = Url.normalize('{}.{}'.format(title, year), ascii_only=True).replace(' ', '.')
        for pdb in predb_titles:
            if year not in pdb:
                continue
            pdb = pdb.split(year)[0] + year
            match = lm.score(pdb.replace(' ', '.'), movie) * 100
            if match > 60:
                logging.debug('{} matches {} at {}%'.format(pdb, movie, int(match)))
                return True
        return False
Ejemplo n.º 8
0
 def test_non_ascii(self):
     self.assertEqual(1.0, liquidmetal.score("İ", "İ"))
Ejemplo n.º 9
0
def test_score_order(target, records):
    ordered = list(get_suggestions(target, records))
    scores = [lm.score(r, target) for r in ordered]
    assert scores == sorted(scores, reverse=True)