예제 #1
0
파일: episode.py 프로젝트: johslarsen/aesop
    def full_lookup(self, path):
        params = {
            'query': self.title,
        }
        resp, json = yield from get('https://hummingbird.me/api/v1/search/anime/', params=params)

        def keyfunc(f):
            return {
                'TV': 0,
                'Movie': 1,
                'Special': 2,
            }.get(f['show_type'], 3)

        show = sorted(json, key=keyfunc)[0]
        title = show['title']
        id = show['id']
        if 'started_airing' in show:
            year = int(show['started_airing'][:4])
        else:
            year = self.year

        resp, json = yield from get('https://hummingbird.me/api/v1/anime/{}'.format(id))
        genres = [p['name'] for p in json['genres']]

        return self._replace(title=title, media_id=id, year=year, genres=genres)
예제 #2
0
def convoluted_imdb_lookup(lookup):
    """This function is an atrocity."""

    from aesop.processor.movie import MovieLookup

    media_id = None
    genres = None
    video_type = 'movie' if isinstance(lookup, MovieLookup) else 'series'

    # if we know the year, we'll try and get it based on name and year from
    # omdb, if we're lucky.
    if lookup.year is not None:
        log.debug("Have year, doing specific lookup ({}, {})", lookup.title, lookup.year)
        params = {
            't': lookup.title,
            'type': video_type,
            'y': str(lookup.year),
        }

        resp, json = yield from get('http://www.omdbapi.com/', params=params)

        if json['Response'] != 'False':
            media_id = json['imdbID']
            year = int(json['Year'][:4])
            title = json['Title']

    # we couldn't find the movie, or we don't have a year available
    if media_id is None:
        params = {
            's': lookup.title,
            'type': video_type,
        }

        resp, json = yield from get('http://www.omdbapi.com/', params=params)

        if json.get('Response', 'True') != 'False':
            titles = sorted((
                dict(title=t['Title'], year=t['Year'], id=t['imdbID'], description='{} {}'.format(t['Title'], t['Year']))
                for t in json['Search']),
                key=lambda t: damerau_levenshtein(lookup.title, html.unescape(t['title']))
            )
        else:
            params = {
                'q': lookup.title,
                'tt': 'on',
                'nr': '1',
                'json': '1',
            }

            resp, json = yield from get('http://www.imdb.com/xml/find', params=params)

            titles = itertools.chain(
                json.get('title_popular', []),
                json.get('title_exact', []),
                json.get('title_approx', []),
                json.get('title_substring', []),
            )
            titles = sorted((t for t in titles), key=lambda t: damerau_levenshtein(lookup.title, html.unescape(t['title'])))

        # damerau-levenshtein helps with names like "Agents of S.H.I.E.L.D.",
        # which we translate to "Agents of S H I E L D" to handle terrible
        # torrents named "Agents.of.Shield"
        d = damerau_levenshtein(lookup.title, titles[0]['title'])
        if d <= 10:
            title = titles[0]['title']
            media_id = titles[0]['id']
        elif lookup.year is not None:
            # if we have a year but the damerau-levenshtein distance was too
            # high for the first result, we can cycle through the results based
            # on the year and take a best-guess. This works for titles that
            # have quite different names all over the place, e.g. "The Borrower
            # Arrietty" vs "The Secret World of Arrietty".

            # XXX: If we don't have a lookup year available, it would be nice
            # if we followed up by checking for unique titles, e.g. "Arrietty"
            # only appears in one movie title, so we can quite confidently say
            # that that's the correct one.
            for title in titles:
                if str(lookup.year) in title['description']:
                    media_id = title['id']
                    title = title['title']
                    break

        if media_id is None:
            # if we reach this point, we are, by all accounts, probably wrong.
            # This will work for horribly misnamed movies, e.g. "Jurassic Park
            # - The Lost World", which will correctly map to "The Lost World:
            # Jurassic Park".
            media_id, title = name_jumble_rumble(lookup.title, titles)

        if lookup.year is None:
            params = {
                'i': media_id,
                'p': 'full',
                'type': video_type,
            }
            resp, json = yield from get('http://www.omdbapi.com/?', params=params)
            if json['Response'] != 'False':
                year = int(json['Year'][:4])
                genres = json['Genre'].split(', ')
            else:
                year = None

        # IMDB puts the year in the description. If it's not there, we likely
        # have the wrong title, so we should cycle through again to try and
        # find it.
        elif str(lookup.year) not in titles[0]['description']:
            with FingersCrossedHandler(default_handler):
                for title in titles:
                    log.debug("Prospective title: {!r}", title)
                    if str(lookup.year) in title['description']:
                        media_id = title['id']
                        title = title['title']
                        break
                else:
                    log.error("BUG: Couldn't find anything suitable for {}".format(lookup))
                    raise SkipIt
            year = lookup.year
        else:
            year = lookup.year

    if genres is None:
        params = {
            'i': media_id,
            'p': 'full',
            'type': video_type,
        }
        resp, json = yield from get('http://www.omdbapi.com/?', params=params)
        if json['Response'] != 'False':
            genres = json['Genre'].split(', ')

    return lookup._replace(title=html.unescape(title), year=year, media_id=media_id, genres=genres or [])