コード例 #1
1
ファイル: test_adapter.py プロジェクト: 01-/cachecontrol
    def test_close(self):
        cache = mock.Mock(spec=DictCache)
        sess = Session()
        sess.mount('http://', CacheControlAdapter(cache))

        sess.close()
        assert cache.close.called
コード例 #2
1
ファイル: util.py プロジェクト: stabora/nbsf
    def get_http_request(url, payload, method='POST', headers=None, use_proxy=False, use_proxy_auth=False, trust_env=True):
        try:
            session = Session()
            session.trust_env = trust_env
            session.proxies = Util.get_proxies() if use_proxy else None
            session.auth = Util.get_proxy_auth() if use_proxy_auth else None

            request = Request(
                'POST' if method not in ('GET', 'POST') else method,
                url,
                data=payload if method == 'POST' else None,
                params=payload if method == 'GET' else None,
                headers=headers
            )

            prepped = request.prepare()

            response = session.send(
                prepped,
                timeout=app.config['HTTP_REQUESTS_TIMEOUT']
            )

            session.close()
        except Exception, e:
            response = Response()
            response.raise_for_status()
            return response, 'Error al realizar la consulta - Motivo: {}'.format(e.message)
コード例 #3
1
class OneM2MHttpTx(IoTTx):
    """Implementation of HTTP OneM2M Tx channel"""

    def __init__(self, encoder, decoder):
        super(OneM2MHttpTx, self).__init__(encoder, decoder)
        self.session = None

    def _start(self):
        self.session = Session()

    def _stop(self):
        if self.session:
            self.session.close()
        self.session = None

    def send(self, jsonprimitive):
        try:
            message = self.encoder.encode(jsonprimitive)
        except IoTDataEncodeError as e:
            return None

        rsp_message = self.session.send(message)

        rsp_primitive = None
        try:
            rsp_primitive = self.decoder.decode(rsp_message)
        except IoTDataDecodeError as e:
            return None

        return rsp_primitive
コード例 #4
0
ファイル: locustfile.py プロジェクト: fbarquero/locust_swPerf
 def hit_example_com(self):
     try:
         start_time = time()
         session = Session()
         http_adapter = HTTPAdapter(max_retries=0)
         session.mount('http://', http_adapter)
         session.mount('https://', http_adapter)
         session.get("http://www.example.com", timeout=30)
         # # print("Doing a task that is not a request...")
         # login = Login()
         # r = login.sw_valid_login(GC.USERNAME, GC.PASSWORD, "http://www.sowatest.com")
         stats_latency['latency'].append(time() - start_time)
         events.request_success.fire(request_type="Transaction", name="hit_sowatest", response_time=time() - start_time, response_length=0)
         session.close()
         # # Assert Section
         # assert r.status_code == 200
         # assert "Access Denied" in str(html.fromstring(r.text).xpath("//title/text()"))
         # assert '<div id="blockedBanner">' in r.text
     except Exception, e:
         """
         * *request_type*: Request type method used
         * *name*: Path to the URL that was called (or override name if it was used in the call to the client)
         * *response_time*: Time in milliseconds until exception was thrown
         * *exception*: Exception instance that was thrown
         """
         events.request_failure.fire(request_type="Transaction", name="hit_sowatest", response_time=time() - start_time, exception=e)
コード例 #5
0
    def send_message(self, request):
        """Transport the message to the server and return the response.

        :param request: The JSON-RPC request string.
        :return: The response (a string for requests, None for notifications).
        """
        # Prepare the session
        session = Session()
        session_request = Request(method='POST', url=self.endpoint, \
            headers=self.headers, data=request, **self.requests_kwargs)
        prepared_request = session.prepare_request(session_request)
        prepared_request.headers = dict(list(dict(
            prepared_request.headers).items()) + list(self.headers.items()))
        # Log the request
        self.log_request(request, {'http_headers': prepared_request.headers})
        # Send the message
        try:
            response = session.send(prepared_request)
        except RequestException:
            session.close()
            raise
        session.close()
        # Log the response
        self.log_response(response.text, {'http_code': response.status_code, \
            'http_reason': response.reason, 'http_headers': response.headers})
        return response.text
コード例 #6
0
ファイル: handlers.py プロジェクト: davendesai/pluggit
class PluggitHandler:
    """
    handler is the global network handler for Pluggit. It routes all network
    requests through it in order to respect Reddit API rules. It keeps all 
    OAuth requests separate as they are based on a per-user_agent rate-limit.
    """

    def __init__(self, debug = False):
        # Create logger
        self.logger = logging.getLogger('PluggitHandler')
        self.logger.setLevel(logging.INFO)
        if debug:
            self.logger.setLevel(logging.DEBUG)
        
        # Create dict { bearer: last_request_time }
        self.oauth_dict = {}

        # Required by PRAW
        self.session = Session()
        self.lock = Lock()

    def __del__(self):
        if not self.session == None:
            self.session.close()

    def request(self, request, proxies, timeout, verify, **kwargs):
        # Evict oauth_session if more than 1hr old
        self.oauth_dict = { key:value for key, value in self.oauth_dict.items() if value < (time() + (60 * 60)) }

        # Get current oauth_session
        oauth_session = None
        if 'Authorization' in request.headers:
            payload = request.headers['Authorization'].split(' ')

            if payload[0] == 'bearer':
                oauth_session = payload[1]
                
        if not oauth_session == None:
            # Previously made a request
            if oauth_session in self.oauth_dict:
                # Lock to prevent multiple threads requesting from same OAUTH session
                with self.lock:
                    now = time()
                    wait_time = self.oauth_dict[oauth_session] + 2 - now

                    if wait_time > 0:
                        self.logger.debug(' SESSION: ' + oauth_session + ' SLEEPING: ' + str(wait_time))
                        now += wait_time
                        sleep(wait_time)
                        
                    self.oauth_dict[oauth_session] = now
                
            else:
                self.oauth_dict[oauth_session] = time()
     
        return self.session.send(request,
                                 proxies = proxies,
                                 timeout = timeout,
                                 allow_redirects = False, verify = verify)
コード例 #7
0
class Flowdock:
    """Simple wrapper for Flowdock REST API."""

    API_URL = "https://api.flowdock.com"

    def __init__(self, api_key, debug=False, print_function=None):
        """Initialize Flowdock API wrapper.

        @param debug Print debug info if True
        @param print_function Use this function to print debug info. By default
        use python builtin print. Mainly for using click.echo without requiring
        click as dependency.
        """
        self.session = Session()
        # requests accepts http basic auth as tuple (user, pass), however,
        # Flowdoc uses only api key as username without password
        self.session.auth = (api_key, None)
        self.debug = debug
        self.print = print_function if print_function else print

    def get_organizations(self):
        """Get list of organizations this user has access to"""
        url = "{}/organizations".format(self.API_URL)
        if self.debug:
            self.print("Sending GET request to URL {}".format(url))
        r = self.session.get(url)
        r.raise_for_status()
        return r.json()

    def find_user_orgs(self, email):
        """Find organizations this user belongs to"""
        orgs = self.get_organizations()
        return [org for org in orgs if Flowdock.user_in_org(email, org)]

    @staticmethod
    def user_in_org(email, org):
        """Chek if user is part of organization"""
        for user in org['users']:
            if user['email'] == email:
                return True
        return False

    def delete_user_from_org(self, user, org):
        url = "{}/organizations/{}/users/{}".format(self.API_URL,
                                                    org['parameterized_name'],
                                                    user['id'])
        if self.debug:
            self.print("Sending DELETE request to url {}".format(url))

        r = self.session.delete(url)
        r.raise_for_status()

    def close(self):
        self.session.close()
コード例 #8
0
ファイル: thesubdb.py プロジェクト: ArthurGarnier/SickRage
class TheSubDBProvider(Provider):
    """TheSubDB Provider."""
    languages = {Language.fromthesubdb(l) for l in language_converters['thesubdb'].codes}
    required_hash = 'thesubdb'
    server_url = 'http://api.thesubdb.com/'
    subtitle_class = TheSubDBSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = ('SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)' %
                                              __short_version__)

    def terminate(self):
        self.session.close()

    def query(self, hash):
        # make the query
        params = {'action': 'search', 'hash': hash}
        logger.info('Searching subtitles %r', params)
        r = self.session.get(self.server_url, params=params, timeout=10)

        # handle subtitles not found and errors
        if r.status_code == 404:
            logger.debug('No subtitles found')
            return []
        r.raise_for_status()

        # loop over languages
        subtitles = []
        for language_code in r.text.split(','):
            language = Language.fromthesubdb(language_code)

            subtitle = self.subtitle_class(language, hash)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        return [s for s in self.query(video.hashes['thesubdb']) if s.language in languages]

    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)
        params = {'action': 'download', 'hash': subtitle.hash, 'language': subtitle.language.alpha2}
        r = self.session.get(self.server_url, params=params, timeout=10)
        r.raise_for_status()

        subtitle.content = fix_line_ending(r.content)
コード例 #9
0
ファイル: dbupdate.py プロジェクト: PavelShmigel/currencyX
def getCurrencys():
    result = None
    s = Session()
    try:
        resp = s.get(BASE_URL+CURRENCIES, params={'app_id': APP_ID})
        currencys = json.loads(resp.text)

        result = currencys

    except Exception as e:
        result = None
    finally:
        s.close()
        return result
コード例 #10
0
ファイル: dbupdate.py プロジェクト: PavelShmigel/currencyX
def getRates():
    result = None
    s = Session()
    try:
        resp = s.get(BASE_URL+LATEST_RATES, params={'app_id': APP_ID})

        raw_rates_json = json.loads(resp.text)
        result = raw_rates_json

    except Exception as e:
        result = None
    finally:
        s.close()
        return result
コード例 #11
0
ファイル: napiprojekt.py プロジェクト: ArthurGarnier/SickRage
class NapiProjektProvider(Provider):
    """NapiProjekt Provider."""
    languages = {Language.fromalpha2(l) for l in ['pl']}
    required_hash = 'napiprojekt'
    server_url = 'http://napiprojekt.pl/unit_napisy/dl.php'
    subtitle_class = NapiProjektSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__

    def terminate(self):
        self.session.close()

    def query(self, language, hash):
        params = {
            'v': 'dreambox',
            'kolejka': 'false',
            'nick': '',
            'pass': '',
            'napios': 'Linux',
            'l': language.alpha2.upper(),
            'f': hash,
            't': get_subhash(hash)}
        logger.info('Searching subtitle %r', params)
        r = self.session.get(self.server_url, params=params, timeout=10)
        r.raise_for_status()

        # handle subtitles not found and errors
        if r.content[:4] == b'NPc0':
            logger.debug('No subtitles found')
            return None

        subtitle = self.subtitle_class(language, hash)
        subtitle.content = r.content
        logger.debug('Found subtitle %r', subtitle)

        return subtitle

    def list_subtitles(self, video, languages):
        return [s for s in [self.query(l, video.hashes['napiprojekt']) for l in languages] if s is not None]

    def download_subtitle(self, subtitle):
        # there is no download step, content is already filled from listing subtitles
        pass
コード例 #12
0
ファイル: thesubdb.py プロジェクト: lastdevonearth/SickRage
class TheSubDBProvider(Provider):
    languages = {Language.fromthesubdb(l) for l in language_converters["thesubdb"].codes}
    required_hash = "thesubdb"
    server_url = "http://api.thesubdb.com/"

    def initialize(self):
        self.session = Session()
        self.session.headers = {
            "User-Agent": "SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)" % get_version(__version__)
        }

    def terminate(self):
        self.session.close()

    def query(self, hash):
        # make the query
        params = {"action": "search", "hash": hash}
        logger.info("Searching subtitles %r", params)
        r = self.session.get(self.server_url, params=params, timeout=10)

        # handle subtitles not found and errors
        if r.status_code == 404:
            logger.debug("No subtitles found")
            return []
        r.raise_for_status()

        # loop over languages
        subtitles = []
        for language_code in r.text.split(","):
            language = Language.fromthesubdb(language_code)

            subtitle = TheSubDBSubtitle(language, hash)
            logger.debug("Found subtitle %r", subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        return [s for s in self.query(video.hashes["thesubdb"]) if s.language in languages]

    def download_subtitle(self, subtitle):
        logger.info("Downloading subtitle %r", subtitle)
        params = {"action": "download", "hash": subtitle.hash, "language": subtitle.language.alpha2}
        r = self.session.get(self.server_url, params=params, timeout=10)
        r.raise_for_status()

        subtitle.content = fix_line_ending(r.content)
コード例 #13
0
ファイル: shooter.py プロジェクト: ArthurGarnier/SickRage
class ShooterProvider(Provider):
    """Shooter Provider."""
    languages = {Language(l) for l in ['eng', 'zho']}
    server_url = 'https://www.shooter.cn/api/subapi.php'
    subtitle_class = ShooterSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__

    def terminate(self):
        self.session.close()

    def query(self, language, filename, hash=None):
        # query the server
        params = {'filehash': hash, 'pathinfo': os.path.realpath(filename), 'format': 'json', 'lang': language.shooter}
        logger.debug('Searching subtitles %r', params)
        r = self.session.post(self.server_url, params=params, timeout=10)
        r.raise_for_status()

        # handle subtitles not found
        if r.content == b'\xff':
            logger.debug('No subtitles found')
            return []

        # parse the subtitles
        results = json.loads(r.text)
        subtitles = [self.subtitle_class(language, hash, t['Link']) for s in results for t in s['Files']]

        return subtitles

    def list_subtitles(self, video, languages):
        return [s for l in languages for s in self.query(l, video.name, video.hashes.get('shooter'))]

    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        subtitle.content = fix_line_ending(r.content)
コード例 #14
0
ファイル: __init__.py プロジェクト: QuantConnect/Lean
    def try_request(self, request):
        """Place a secure request and get back an object of type T.
        Args:
            request: Result object of the request
        Returns:
            result: request response
        """
        timestamp = int(time())
        hash = create_secure_hash(timestamp, self.token)
        request.auth = (self.userId, hash)
        request.headers.update({'Timestamp': str(timestamp)})
        request.url = self.client + request.url

        try:
            session = Session()
            response = session.send(request.prepare())
            session.close()
            return response.json()
        except:
            exception('Failed to make REST request to {0}'.format(request.url))
            return { 'success': False }
コード例 #15
0
ファイル: fetchers.py プロジェクト: jdhacks/dated
class HTMLFetcher(BaseFetcher):
    '''
        A fetcher which uses requests to return source html of a webpage
        without rendering JS. Faster but less thourough than JSFetcher
    '''
    def setup(self, timeout):
        self.timeout = timeout
        self.session = Session()
        
    def teardown(self):
        self.session.close()
        return None
        
    def get(self, url):
        logging.info("retrieving: " + url)
        try:
            rsp = self.session.get(url, timeout=self.timeout)
            if not rsp:
                return ''
            return rsp.text
        except Exception as e:
            logging.warn("caught <{0}> while retrieving <{1}>".format(e.__traceback__, url))
            return ''
コード例 #16
0
ファイル: figo.py プロジェクト: figo-connect/python-figo
    def _request_api(self, path, data=None, method="GET"):
        """Helper method for making a REST-compliant API call.

        Args:
            path: path on the server to call
            data: dictionary of data to send to the server in message body
            method: - HTTP verb to use for the request

        Returns:
            the JSON-parsed result body
        """

        complete_path = self.api_endpoint + path

        session = Session()
        session.headers.update(self.headers)

        try:
            response = session.request(method, complete_path, json=data)
        finally:
            session.close()

        if 200 <= response.status_code < 300 or self._has_error(response.json()):
            if response.text == '':
                return {}
            return response.json()
        elif response.status_code in ERROR_MESSAGES:
            return {'error': ERROR_MESSAGES[response.status_code]}

        logger.warn("Querying the API failed when accessing '%s': %d",
                    complete_path,
                    response.status_code)

        return {'error': {
            'message': "internal_server_error",
            'description': "We are very sorry, but something went wrong",
            'code': 90000}}
コード例 #17
0
class TVsubtitlesProvider(Provider):
    """TVsubtitles Provider."""
    languages = {Language('por', 'BR')} | {
        Language(l)
        for l in [
            'ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra',
            'hun', 'ita', 'jpn', 'kor', 'nld', 'pol', 'por', 'ron', 'rus',
            'spa', 'swe', 'tur', 'ukr', 'zho'
        ]
    }
    video_types = (Episode, )
    server_url = 'http://www.tvsubtitles.net/'
    subtitle_class = TVsubtitlesSubtitle

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % __short_version__

    def terminate(self):
        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if any.
        :rtype: int

        """
        # make the search
        logger.info('Searching show id for %r', series)
        r = self.session.post(self.server_url + 'search.php',
                              data={'q': series},
                              timeout=10)
        r.raise_for_status()

        # get the series out of the suggestions
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        show_id = None
        for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
            match = link_re.match(suggestion.text)
            if not match:
                logger.error('Failed to match %s', suggestion.text)
                continue

            if match.group('series').lower() == series.lower():
                if year is not None and int(match.group('first_year')) != year:
                    logger.debug('Year does not match')
                    continue
                show_id = int(suggestion['href'][8:-5])
                logger.debug('Found show id %d', show_id)
                break

        return show_id

    @region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME)
    def get_episode_ids(self, show_id, season):
        """Get episode ids from the show id and the season.

        :param int show_id: show id.
        :param int season: season of the episode.
        :return: episode ids per episode number.
        :rtype: dict

        """
        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)
        r = self.session.get(self.server_url + 'tvshow-%d-%d.html' %
                             (show_id, season),
                             timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over episode rows
        episode_ids = {}
        for row in soup.select('table#table5 tr'):
            # skip rows that do not have a link to the episode page
            if not row('a', href=episode_id_re):
                continue

            # extract data from the cells
            cells = row('td')
            episode = int(cells[0].text.split('x')[1])
            episode_id = int(cells[1].a['href'][8:-5])
            episode_ids[episode] = episode_id

        if episode_ids:
            logger.debug('Found episode ids %r', episode_ids)
        else:
            logger.warning('No episode ids found')

        return episode_ids

    def query(self, show_id, series, season, episode, year=None):
        # get the episode ids
        episode_ids = self.get_episode_ids(show_id, season)
        if episode not in episode_ids:
            logger.error('Episode %d not found', episode)
            return []

        # get the episode page
        logger.info('Getting the page for episode %d', episode_ids[episode])
        r = self.session.get(self.server_url +
                             'episode-%d.html' % episode_ids[episode],
                             timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitles rows
        subtitles = []
        for row in soup.select('.subtitlen'):
            # read the item
            language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
            subtitle_id = int(row.parent['href'][10:-5])
            page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
            rip = row.find('p', title='rip').text.strip() or None
            release = row.find('h5').text.strip() or None

            subtitle = self.subtitle_class(language, page_link, subtitle_id,
                                           series, season, episode, year, rip,
                                           release)
            logger.debug('Found subtitle %s', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        # lookup show_id
        titles = [video.series] + video.alternative_series
        show_id = None
        for title in titles:
            show_id = self.search_show_id(title, video.year)
            if show_id is not None:
                break

        # query for subtitles with the show_id
        if show_id is not None:
            subtitles = [
                s for s in self.query(show_id, title, video.season,
                                      video.episode, video.year)
                if s.language in languages and s.episode == video.episode
            ]
            if subtitles:
                return subtitles
        else:
            logger.error('No show id found for %r (%r)', video.series,
                         {'year': video.year})

        return []

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(self.server_url +
                             'download-%d.html' % subtitle.subtitle_id,
                             timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            if len(zf.namelist()) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
コード例 #18
0
class Drission(object):
    """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色"""
    def __init__(self,
                 driver_or_options: Union[WebDriver, dict, Options,
                                          DriverOptions] = None,
                 session_or_options: Union[Session, dict,
                                           SessionOptions] = None,
                 ini_path: str = None,
                 proxy: dict = None):
        """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象       \n
        :param driver_or_options: driver对象或chrome设置,Options类或设置字典
        :param session_or_options: Session对象或设置
        :param ini_path: ini文件路径
        :param proxy: 代理设置
        """
        self._session = None
        self._driver = None
        self._debugger = None
        self._proxy = proxy

        om = OptionsManager(
            ini_path
        ) if session_or_options is None or driver_or_options is None else None

        # ------------------处理session options----------------------
        if session_or_options is None:
            self._session_options = om.session_options

        else:
            # 若接收到Session对象,直接记录
            if isinstance(session_or_options, Session):
                self._session = session_or_options

            # 否则记录其配置信息
            else:
                self._session_options = _session_options_to_dict(
                    session_or_options)

        # ------------------处理driver options----------------------
        if driver_or_options is None:
            self._driver_options = om.chrome_options
            self._driver_options['driver_path'] = om.get_value(
                'paths', 'chromedriver_path')

        else:
            # 若接收到WebDriver对象,直接记录
            if isinstance(driver_or_options, WebDriver):
                self._driver = driver_or_options

            # 否则记录其配置信息
            else:
                self._driver_options = _chrome_options_to_dict(
                    driver_or_options)

    @property
    def session(self) -> Session:
        """返回Session对象,如未初始化则按配置信息创建"""
        if self._session is None:
            self._set_session(self._session_options)

            if self._proxy:
                self._session.proxies = self._proxy

        return self._session

    @property
    def driver(self) -> WebDriver:
        """返回WebDriver对象,如未初始化则按配置信息创建。         \n
        如设置了本地调试浏览器,可自动接入或打开浏览器进程。
        """
        if self._driver is None:
            if isinstance(self._driver_options, dict):
                options = _dict_to_chrome_options(self._driver_options)
            else:
                raise TypeError('Driver options invalid')

            if self._proxy:
                options.add_argument(f'--proxy-server={self._proxy["http"]}')

            driver_path = self._driver_options.get('driver_path',
                                                   None) or 'chromedriver'
            chrome_path = self._driver_options.get('binary_location',
                                                   None) or 'chrome.exe'

            # -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程-----------
            if options.debugger_address and _check_port(
                    options.debugger_address) is False:
                from subprocess import Popen
                port = options.debugger_address[options.debugger_address.
                                                rfind(':') + 1:]

                try:
                    self._debugger = Popen(
                        f'{chrome_path} --remote-debugging-port={port}',
                        shell=False)

                    if chrome_path == 'chrome.exe':
                        from common import get_exe_path_from_port
                        chrome_path = get_exe_path_from_port(port)

                # 启动不了进程,主动找浏览器执行文件启动
                except FileNotFoundError:
                    from DrissionPage.easy_set import _get_chrome_path
                    chrome_path = _get_chrome_path(show_msg=False)

                    if not chrome_path:
                        raise FileNotFoundError('无法找到chrome.exe路径,请手动配置。')

                    self._debugger = Popen(
                        f'"{chrome_path}" --remote-debugging-port={port}',
                        shell=False)

            # -----------创建WebDriver对象-----------
            try:
                self._driver = webdriver.Chrome(driver_path, options=options)

            # 若版本不对,获取对应chromedriver再试
            except (WebDriverException, SessionNotCreatedException):
                from .easy_set import get_match_driver
                chrome_path = None if chrome_path == 'chrome.exe' else chrome_path
                driver_path = get_match_driver(chrome_path=chrome_path,
                                               check_version=False,
                                               show_msg=False)

                if driver_path:
                    try:
                        self._driver = webdriver.Chrome(driver_path,
                                                        options=options)
                    except:
                        print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。')
                        exit(0)

                # 当找不到driver且chrome_path为None时,说明安装的版本过高,改在系统路径中查找
                elif chrome_path is None and driver_path is None:
                    from DrissionPage.easy_set import _get_chrome_path
                    chrome_path = _get_chrome_path(show_msg=False,
                                                   from_ini=False,
                                                   from_regedit=False)
                    driver_path = get_match_driver(chrome_path=chrome_path,
                                                   check_version=False,
                                                   show_msg=False)

                    if driver_path:
                        options.binary_location = chrome_path
                        try:
                            self._driver = webdriver.Chrome(driver_path,
                                                            options=options)
                        except:
                            print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。')
                            exit(0)
                    else:
                        print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。')
                        exit(0)
                else:
                    print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。')
                    exit(0)

            # 反反爬设置
            try:
                self._driver.execute_script(
                    'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'
                )
            except:
                pass

            # self._driver.execute_cdp_cmd(
            #     'Page.addScriptToEvaluateOnNewDocument',
            #     {'source': 'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'})

        return self._driver

    @property
    def debugger_progress(self):
        """调试浏览器进程"""
        return self._debugger

    @property
    def driver_options(self) -> dict:
        """返回driver配置信息"""
        return self._driver_options

    @property
    def session_options(self) -> dict:
        """返回session配置信息"""
        return self._session_options

    @session_options.setter
    def session_options(self, options: Union[dict, SessionOptions]) -> None:
        """设置session配置                  \n
        :param options: session配置字典
        :return: None
        """
        self._session_options = _session_options_to_dict(options)
        self._set_session(self._session_options)

    @property
    def proxy(self) -> Union[None, dict]:
        """返回代理信息"""
        return self._proxy

    @proxy.setter
    def proxy(self, proxies: dict = None) -> None:
        """设置代理信息                \n
        :param proxies: 代理信息字典
        :return: None
        """
        self._proxy = proxies

        if self._session:
            self._session.proxies = proxies

        if self._driver:
            cookies = self._driver.get_cookies()
            url = self._driver.current_url
            self._driver.quit()
            self._driver = None
            self._driver = self.driver
            self._driver.get(url)

            for cookie in cookies:
                self.set_cookies(cookie, set_driver=True)

    def set_cookies(self,
                    cookies: Union[RequestsCookieJar, list, tuple, str, dict],
                    set_session: bool = False,
                    set_driver: bool = False) -> None:
        """设置cookies                                                      \n
        :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict
        :param set_session: 是否设置session的cookies
        :param set_driver: 是否设置driver的cookies
        :return: None
        """
        cookies = _cookies_to_tuple(cookies)

        for cookie in cookies:
            if cookie['value'] is None:
                cookie['value'] = ''

            # 添加cookie到session
            if set_session:
                kwargs = {
                    x: cookie[x]
                    for x in cookie
                    if x.lower() not in ('name', 'value', 'httponly', 'expiry',
                                         'samesite')
                }

                if 'expiry' in cookie:
                    kwargs['expires'] = cookie['expiry']

                self.session.cookies.set(cookie['name'], cookie['value'],
                                         **kwargs)

            # 添加cookie到driver
            if set_driver:
                if 'expiry' in cookie:
                    cookie['expiry'] = int(cookie['expiry'])

                try:
                    browser_domain = extract(self.driver.current_url).fqdn
                except AttributeError:
                    browser_domain = ''

                if not cookie.get('domain', None):
                    if browser_domain:
                        url = extract(browser_domain)
                        cookie_domain = f'{url.domain}.{url.suffix}'
                    else:
                        raise ValueError(
                            'There is no domain name in the cookie or the browser has not visited a URL.'
                        )

                    cookie['domain'] = cookie_domain

                else:
                    cookie_domain = cookie['domain'] if cookie['domain'][
                        0] != '.' else cookie['domain'][1:]

                if cookie_domain not in browser_domain:
                    self.driver.get(cookie_domain if cookie_domain.startswith(
                        'http://') else f'http://{cookie_domain}')

                # 避免selenium自动添加.后无法正确覆盖已有cookie
                if cookie['domain'][0] != '.':
                    c = self.driver.get_cookie(cookie['name'])
                    if c and c['domain'] == cookie['domain']:
                        self.driver.delete_cookie(cookie['name'])

                self.driver.add_cookie(cookie)

    def _set_session(self, data: dict) -> None:
        if self._session is None:
            self._session = Session()

        attrs = [
            'headers', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert',
            'stream', 'trust_env', 'max_redirects'
        ]  # , 'adapters'

        if 'cookies' in data:
            self.set_cookies(data['cookies'], set_session=True)

        for i in attrs:
            if i in data:
                self._session.__setattr__(i, data[i])

    def cookies_to_session(self, copy_user_agent: bool = False) -> None:
        """把driver对象的cookies复制到session对象    \n
        :param copy_user_agent: 是否复制ua信息
        :return: None
        """
        if copy_user_agent:
            self.user_agent_to_session(self.driver, self.session)

        self.set_cookies(self.driver.get_cookies(), set_session=True)

    def cookies_to_driver(self, url: str) -> None:
        """把session对象的cookies复制到driver对象  \n
        :param url: 作用域
        :return: None
        """
        browser_domain = extract(self.driver.current_url).fqdn
        ex_url = extract(url)

        if ex_url.fqdn not in browser_domain:
            self.driver.get(url)

        domain = f'{ex_url.domain}.{ex_url.suffix}'

        cookies = []
        for cookie in self.session.cookies:
            if cookie.domain == '':
                cookie.domain = domain

            if domain in cookie.domain:
                cookies.append(cookie)

        self.set_cookies(cookies, set_driver=True)

    def user_agent_to_session(self,
                              driver: WebDriver = None,
                              session: Session = None) -> None:
        """把driver的user-agent复制到session    \n
        :param driver: 来源driver对象
        :param session: 目标session对象
        :return: None
        """
        driver = driver or self.driver
        session = session or self.session
        selenium_user_agent = driver.execute_script(
            "return navigator.userAgent;")
        session.headers.update({"User-Agent": selenium_user_agent})

    def close_driver(self) -> None:
        """关闭driver和浏览器"""
        if self._driver:
            self._driver.quit()
            self._driver = None

    def close_session(self) -> None:
        """关闭session"""
        if self._session:
            self._session.close()
            self._session = None

    def close(self) -> None:
        """关闭session、driver和浏览器"""
        if self._driver:
            self.close_driver()

        if self._session:
            self.close_session()

    def __del__(self):
        """关闭对象时关闭浏览器和Session"""
        try:
            self.close()
        except ImportError:
            pass
コード例 #19
0
ファイル: subscenter.py プロジェクト: xelgand/SickChill
class SubsCenterProvider(Provider):
    """SubsCenter Provider."""
    languages = {Language.fromalpha2(l) for l in ['he']}
    server_url = 'http://www.subscenter.info/he/'

    def __init__(self, username=None, password=None):
        if username is not None and password is None or username is None and password is not None:
            raise ConfigurationError('Username and password must be specified')

        self.session = None
        self.username = username
        self.password = password
        self.logged_in = False

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(
            __short_version__)

        # login
        if self.username is not None and self.password is not None:
            logger.debug('Logging in')
            url = self.server_url + 'subscenter/accounts/login/'

            # retrieve CSRF token
            self.session.get(url)
            csrf_token = self.session.cookies['csrftoken']

            # actual login
            data = {
                'username': self.username,
                'password': self.password,
                'csrfmiddlewaretoken': csrf_token
            }
            r = self.session.post(url, data, allow_redirects=False, timeout=10)

            if r.status_code != 302:
                raise AuthenticationError(self.username)

            logger.info('Logged in')
            self.logged_in = True

    def terminate(self):
        # logout
        if self.logged_in:
            logger.info('Logging out')
            r = self.session.get(self.server_url +
                                 'subscenter/accounts/logout/',
                                 timeout=10)
            r.raise_for_status()
            logger.info('Logged out')
            self.logged_in = False

        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_url_titles(self, title):
        """Search the URL titles by kind for the given `title`.

        :param str title: title to search for.
        :return: the URL titles by kind.
        :rtype: collections.defaultdict

        """
        # make the search
        logger.info('Searching title name for %r', title)
        r = self.session.get(self.server_url + 'subtitle/search/',
                             params={'q': title},
                             timeout=10)
        r.raise_for_status()

        # check for redirections
        if r.history and all([h.status_code == 302 for h in r.history]):
            logger.debug('Redirected to the subtitles page')
            links = [r.url]
        else:
            # get the suggestions (if needed)
            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            links = [
                link.attrs['href']
                for link in soup.select('#processes div.generalWindowTop a')
            ]
            logger.debug('Found %d suggestions', len(links))

        url_titles = defaultdict(list)
        for link in links:
            parts = link.split('/')
            url_titles[parts[-3]].append(parts[-2])

        return url_titles

    def query(self, title, season=None, episode=None):
        # search for the url title
        url_titles = self._search_url_titles(title)

        # episode
        if season and episode:
            if 'series' not in url_titles:
                logger.error('No URL title found for series %r', title)
                return []
            url_title = url_titles['series'][0]
            logger.debug('Using series title %r', url_title)
            url = self.server_url + 'cst/data/series/sb/{}/{}/{}/'.format(
                url_title, season, episode)
            page_link = self.server_url + 'subtitle/series/{}/{}/{}/'.format(
                url_title, season, episode)
        else:
            if 'movie' not in url_titles:
                logger.error('No URL title found for movie %r', title)
                return []
            url_title = url_titles['movie'][0]
            logger.debug('Using movie title %r', url_title)
            url = self.server_url + 'cst/data/movie/sb/{}/'.format(url_title)
            page_link = self.server_url + 'subtitle/movie/{}/'.format(
                url_title)

        # get the list of subtitles
        logger.debug('Getting the list of subtitles')
        r = self.session.get(url)
        r.raise_for_status()
        results = json.loads(r.text)

        # loop over results
        subtitles = {}
        for language_code, language_data in results.items():
            for quality_data in language_data.values():
                for quality, subtitles_data in quality_data.items():
                    for subtitle_item in subtitles_data.values():
                        # read the item
                        language = Language.fromalpha2(language_code)
                        hearing_impaired = bool(
                            subtitle_item['hearing_impaired'])
                        subtitle_id = subtitle_item['id']
                        subtitle_key = subtitle_item['key']
                        subtitle_version = subtitle_item['h_version']
                        downloaded = subtitle_item['downloaded']
                        release = subtitle_item['subtitle_version']

                        # add the release and increment downloaded count if we already have the subtitle
                        if subtitle_id in subtitles:
                            logger.debug(
                                'Found additional release %r for subtitle %d',
                                release, subtitle_id)
                            bisect.insort_left(subtitles[subtitle_id].releases,
                                               release)  # deterministic order
                            subtitles[subtitle_id].downloaded += downloaded
                            continue

                        # otherwise create it
                        subtitle = SubsCenterSubtitle(
                            language, hearing_impaired, page_link, title,
                            season, episode, title, subtitle_id, subtitle_key,
                            subtitle_version, downloaded, [release])
                        logger.debug('Found subtitle %r', subtitle)
                        subtitles[subtitle_id] = subtitle

        return subtitles.values()

    def list_subtitles(self, video, languages):
        season = episode = None
        title = video.title

        if isinstance(video, Episode):
            title = video.series
            season = video.season
            episode = video.episode

        return [
            s for s in self.query(title, season, episode)
            if s.language in languages
        ]

    def download_subtitle(self, subtitle):
        # download
        url = self.server_url + 'subtitle/download/{}/{}/'.format(
            subtitle.language.alpha2, subtitle.subtitle_id)
        params = {'v': subtitle.subtitle_version, 'key': subtitle.subtitle_key}
        r = self.session.get(url,
                             params=params,
                             headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        # open the zip
        try:
            with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
                # remove some filenames from the namelist
                namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
                if len(namelist) > 1:
                    raise ProviderError('More than one file to unzip')

                subtitle.content = fix_line_ending(zf.read(namelist[0]))
        except zipfile.BadZipfile:
            # if no zip file was retrieved, daily downloads limit has exceeded
            raise ProviderError('Daily limit exceeded')
コード例 #20
0
class SubsCenterProvider(Provider):
    languages = {Language.fromalpha2(l) for l in ['he']}
    server = 'http://subscenter.cinemast.com/he/'

    def __init__(self, username=None, password=None):
        if username is not None and password is None or username is None and password is not None:
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False

    def initialize(self):
        self.session = Session()
        self.session.headers = {
            'User-Agent': 'Subliminal/%s' % get_version(__version__)
        }

        # login
        if self.username is not None and self.password is not None:
            logger.debug('Logging in')
            url = self.server + 'subscenter/accounts/login/'

            # retrieve CSRF token
            self.session.get(url)
            csrf_token = self.session.cookies['csrftoken']

            # actual login
            data = {
                'username': self.username,
                'password': self.password,
                'csrfmiddlewaretoken': csrf_token
            }
            r = self.session.post(url, data, allow_redirects=False, timeout=10)

            if r.status_code != 302:
                raise AuthenticationError(self.username)

            logger.info('Logged in')
            self.logged_in = True

    def terminate(self):
        # logout
        if self.logged_in:
            logger.info('Logging out')
            r = self.session.get(self.server + 'subscenter/accounts/logout/',
                                 timeout=10)
            r.raise_for_status()
            logger.info('Logged out')
            self.logged_in = False

        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_url_title(self, title, kind):
        """Search the URL title for the given `title`.

        :param str title: title to search for.
        :param str kind: kind of the title, ``movie`` or ``series``.
        :return: the URL version of the title.
        :rtype: str or None

        """
        # make the search
        logger.info('Searching title name for %r', title)
        r = self.session.get(self.server + 'subtitle/search/',
                             params={'q': title},
                             allow_redirects=False,
                             timeout=10)
        r.raise_for_status()

        # if redirected, get the url title from the Location header
        if r.is_redirect:
            parts = r.headers['Location'].split('/')

            # check kind
            if parts[-3] == kind:
                return parts[-2]

            return None

        # otherwise, get the first valid suggestion
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        suggestions = soup.select('#processes div.generalWindowTop a')
        logger.debug('Found %d suggestions', len(suggestions))
        for suggestion in suggestions:
            parts = suggestion.attrs['href'].split('/')

            # check kind
            if parts[-3] == kind:
                return parts[-2]

    def query(self, series=None, season=None, episode=None, title=None):
        # set the correct parameters depending on the kind
        if series and season and episode:
            url_series = self._search_url_title(series, 'series')
            url = self.server + 'cinemast/data/series/sb/{}/{}/{}/'.format(
                url_series, season, episode)
            page_link = self.server + 'subtitle/series/{}/{}/{}/'.format(
                url_series, season, episode)
        elif title:
            url_title = self._search_url_title(title, 'movie')
            url = self.server + 'cinemast/data/movie/sb/{}/'.format(url_title)
            page_link = self.server + 'subtitle/movie/{}/'.format(url_title)
        else:
            raise ValueError('One or more parameters are missing')

        # get the list of subtitles
        logger.debug('Getting the list of subtitles')
        r = self.session.get(url)
        r.raise_for_status()
        results = json.loads(r.text)

        # loop over results
        subtitles = {}
        for language_code, language_data in results.items():
            for quality_data in language_data.values():
                for quality, subtitles_data in quality_data.items():
                    for subtitle_item in subtitles_data.values():
                        # read the item
                        language = Language.fromalpha2(language_code)
                        hearing_impaired = bool(
                            subtitle_item['hearing_impaired'])
                        subtitle_id = subtitle_item['id']
                        subtitle_key = subtitle_item['key']
                        downloaded = subtitle_item['downloaded']
                        release = subtitle_item['subtitle_version']

                        # add the release and increment downloaded count if we already have the subtitle
                        if subtitle_id in subtitles:
                            logger.debug(
                                'Found additional release %r for subtitle %d',
                                release, subtitle_id)
                            bisect.insort_left(subtitles[subtitle_id].releases,
                                               release)  # deterministic order
                            subtitles[subtitle_id].downloaded += downloaded
                            continue

                        # otherwise create it
                        subtitle = SubsCenterSubtitle(
                            language, hearing_impaired, page_link, series,
                            season, episode, title, subtitle_id, subtitle_key,
                            downloaded, [release])
                        logger.debug('Found subtitle %r', subtitle)
                        subtitles[subtitle_id] = subtitle

        return subtitles.values()

    def list_subtitles(self, video, languages):
        series = None
        season = None
        episode = None
        title = video.title

        if isinstance(video, Episode):
            series = video.series
            season = video.season
            episode = video.episode

        return [
            s for s in self.query(series, season, episode, title)
            if s.language in languages
        ]

    def download_subtitle(self, subtitle):
        # download
        url = self.server + 'subtitle/download/{}/{}/'.format(
            subtitle.language.alpha2, subtitle.subtitle_id)
        params = {'v': subtitle.releases[0], 'key': subtitle.subtitle_key}
        r = self.session.get(url,
                             params=params,
                             headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        # open the zip
        with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
            # remove some filenames from the namelist
            namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
            if len(namelist) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(namelist[0]))
コード例 #21
0
class OAuthClient(object):
    """
    Cloud service only.

    Utility to create a custom OAuth client.

    To connect and authenticate to the Oracle NoSQL Database Cloud Service, a
    client needs to acquire an access token from Oracle Identity Cloud Service
    (IDCS). As a prerequisite, a custom OAuth client named *NoSQLClient* must
    be created first using this utility. This custom client needs to be created
    only once for a tenant.

    This utility needs a valid access token in a token file that can be
    downloaded from the IDCS admin console. After logging into the IDCS admin
    console, choose *Applications* from the button on the top left. Find the
    Application named *ANDC*, click the button *Generate Access Token*, in the
    pop-up window, pick *Invoke Identity Cloud Service APIs* under
    *Customized Scopes*. Click on *Download Token* and a token file will be
    generated and downloaded. Note that this token has a lifetime of one hour.

    After the token file has been downloaded, run this utility to complete the
    OAuth Client creation:

    .. code-block:: shell

      python oauth_client.py -create -idcs_url <tenant-specific IDCS URL> \
-token <token file>

    The tenant-specific IDCS URL is the IDCS host assigned to the tenant. After
    logging into the IDCS admin console, copy the host of the IDCS admin console
    URL. For example, the format of the admin console URL is
    "https\://{tenantId}.identity.oraclecloud.com/ui/v1/adminconsole". The
    "https\://{tenantId}.identity.oraclecloud.com" portion is the required
    parameter.

    After creation, the utility will print out *NoSQLClient is created*.
    The OAuth client id and secret will also be printed out. A credentials file
    template *credentials.tmp* with client id an secret will be generated at the
    working directory by default. Use *-credsdir* to specify different directory.

    This utility also can be used to delete this custom OAuth client in case the
    creation process failed unexpectedly.

    .. code-block:: shell

      python oauth_client.py -delete -idcs_url <tenant-specific IDCS URL> \
-token <token file>

    In addition, this utility can be used to verify if OAuth client is
    configured properly, for example

    .. code-block:: shell

      python oauth_client.py -verify -idcs_url <tenant-specific IDCS URL> \
-token <token file>
    """
    #
    # NOTE: above is simple doc. This information is on the implementation.
    # This custom OAuth client must be created with a specified name. The client
    # must:
    # - enable password, client_credentials as allowed grants
    # - have PSM and NDCS fully-qualified scopes (FQS) as allowed scopes
    # - have ANDC_FullAccessRole
    #
    # The OAuth client creation steps are:
    # 1. Find PSM and NDCS primary audiences from IDCS
    # 2. Build PSM and NDCS FQS with primary audiences, put in the OAuth \
    # client JSON payload
    # 3. POST <idcs_url>/admin/v1/Apps with OAuth client JSON payload
    # 4. Find role ID of ANDC_FullAccessRole
    # 5. Grant ANDC_FullAccessRole to created custom OAuth client
    #

    # Default OAuth client name
    _DEFAULT_NAME = 'NoSQLClient'
    # Default credentials template file name
    _CREDS_TMP = 'credentials.temp'
    # Endpoint with filter used to get PSM App
    _PSM_APP_EP = (Utils.APP_ENDPOINT +
                   '?filter=serviceTypeURN+eq+%22PSMResourceTenatApp%22')
    # Endpoint with filter used to get ANDC App
    _ANDC_APP_EP = (Utils.APP_ENDPOINT + '?filter=serviceTypeURN+eq+%22' +
                    'ANDC_ServiceEntitlement%22+and+isOAuthResource+eq+true')
    # Endpoint with filter used to get role ID of ANDC_FullAccessRole
    _ANDC_ROLE_EP = (Utils.ROLE_ENDPOINT +
                     '?filter=displayName+eq+%22ANDC_FullAccessRole%22')
    # Endpoint with filter used to get oauth client
    _CLIENT_EP = Utils.APP_ENDPOINT + '?filter=displayName+eq+%22'
    # JSON used to create custom OAuth client
    _CLIENT = (
        '{{"displayName": "{0}","isOAuthClient": true,' +
        '"isOAuthResource": false,"isUnmanagedApp": true,"active": true,' +
        '"description": "Custom OAuth Client for application access to ' +
        'NoSQL Database Cloud Service","clientType": "confidential",' +
        '"allowedGrants": ["password", "client_credentials"]' +
        ',"trustScope": "Explicit","allowedScopes": [' +
        '{{"fqs": "{1}"}},{{"fqs": "{2}"}}],' +
        '"schemas": ["urn:ietf:params:scim:schemas:oracle:idcs:App"],' +
        '"basedOnTemplate": {{"value": "CustomWebAppTemplateId"}}}}')
    # JSON used to grant role to client
    _GRANT = (
        '{{"app": {{"value": "{0}"}},"entitlement": {{' +
        '"attributeName": "appRoles","attributeValue": "{1}"}},' +
        '"grantMechanism": "ADMINISTRATOR_TO_APP",' +
        '"grantee": {{"value": "{2}","type": "App"}},' +
        '"schemas": ["urn:ietf:params:scim:schemas:oracle:idcs:Grant"]}}')
    _DEACTIVATE = (
        '{"active": false,"schemas": [' +
        '"urn:ietf:params:scim:schemas:oracle:idcs:AppStatusChanger"]}')

    # Main argument flags
    _IDCS_URL_FLAG = '-idcs_url'
    _TOKEN_FILE_FLAG = '-token'
    _CREATE_FLAG = '-create'
    _DELETE_FLAG = '-delete'
    _VERIFY_FLAG = '-verify'
    _NAME_FLAG = '-name'
    _DIR_FLAG = '-credsdir'
    _TIMEOUT_FLAG = '-timeout'
    _VERBOSE_FLAG = '-verbose'

    def __init__(self):
        self._parse_args()
        url = urlparse(self._idcs_url)
        self._host = url.hostname
        # logger used for HTTP request logging
        self._logger = self._get_logger()
        self._logutils = LogUtils(self._logger)
        self._sess = Session()
        self._request_utils = RequestUtils(self._sess, self._logutils)

    def execute_commands(self):
        # noinspection PyBroadException
        try:
            if self._delete:
                self._do_delete()
            elif self._create:
                self._do_create()
            else:
                errors = list()
                self._do_verify(errors)
                if len(errors) != 0:
                    print('Verification failed: ')
                    for err in errors:
                        print(err)
        except Exception:
            print(format_exc())
        finally:
            if self._sess is not None:
                self._sess.close()

    def _add_app(self, auth, payload):
        # Add the custom OAuth client
        response = self._request_utils.do_post_request(
            self._idcs_url + Utils.APP_ENDPOINT,
            Utils.scim_headers(self._host, auth), payload, self._timeout_ms)
        self._check_not_none(response, 'response of adding OAuth client')
        response_code = response.get_status_code()
        content = response.get_content()
        if response_code == codes.conflict:
            raise IllegalStateException(
                'OAuth Client ' + self._name +
                ' already exists. To recreate,' + ' run with ' +
                OAuthClient._DELETE_FLAG + '. To verify if ' +
                'existing client is configured correctly, run with ' +
                OAuthClient._VERIFY_FLAG)
        elif response_code >= codes.multiple_choices:
            OAuthClient._idcs_errors(response, 'Adding custom client')
        app_id = 'id'
        oauth_id = 'name'
        secret = 'clientSecret'
        app_id_value = Utils.get_field(content, app_id)
        oauth_id_value = Utils.get_field(content, oauth_id)
        secret_value = Utils.get_field(content, secret)
        if (app_id_value is None or oauth_id_value is None
                or secret_value is None):
            raise IllegalStateException(
                str.format('Unable to find {0} or {1} or {2} in ,' + content,
                           app_id, oauth_id, secret))
        return OAuthClient.Client(app_id_value, oauth_id_value, secret_value)

    def _check_not_none(self, response, action):
        if response is None:
            raise IllegalStateException(
                'Error ' + action + ' from Oracle Identity Cloud Service, ' +
                'no response')

    def _creds_template(self, client_id, secret):
        file_dir = ((path.abspath(path.dirname(argv[0]))
                     if self._temp_file_dir is None else self._temp_file_dir) +
                    sep + OAuthClient._CREDS_TMP)
        if path.exists(file_dir):
            remove(file_dir)
        with open(file_dir, 'w') as f:
            if client_id is not None:
                f.write(PropertiesCredentialsProvider.CLIENT_ID_PROP + '=' +
                        client_id + '\n')
                f.write(PropertiesCredentialsProvider.CLIENT_SECRET_PROP +
                        '=' + secret + '\n')
            f.write(PropertiesCredentialsProvider.USER_NAME_PROP + '=\n')
            f.write(PropertiesCredentialsProvider.PWD_PROP + '=\n')
        return file_dir

    def _deactivate_app(self, auth, app_id):
        # Deactivate OAuth client
        response = self._request_utils.do_put_request(
            self._idcs_url + Utils.STATUS_ENDPOINT + sep + app_id,
            Utils.scim_headers(self._host, auth), OAuthClient._DEACTIVATE,
            self._timeout_ms)
        self._check_not_none(response, 'response of deactivating OAuth client')
        if codes.ok <= response.get_status_code() < codes.multiple_choices:
            return
        OAuthClient._idcs_errors(response,
                                 'deactivating OAuth client ' + self._name)

    def _do_create(self):
        self._output('Creating OAuth Client ' + self._name)
        try:
            # Find PSM and ANDC fqs
            auth = 'Bearer ' + self._get_bootstrap_token()
            psm_fqs = self._get_psm_audience(auth) + Utils.PSM_SCOPE
            andc = self._get_andc_info(auth)
            andc_fqs = andc.audience + AccessTokenProvider.SCOPE
            self._log_verbose('Found scopes ' + psm_fqs + ', ' + andc_fqs)
            # Add custom client
            add_app = OAuthClient._CLIENT.format(self._name, psm_fqs, andc_fqs)
            client_info = self._add_app(auth, add_app)
            self._log_verbose('Added OAuth client ' + self._name)
            # Find ANDC role id
            role_id = self._get_id(auth,
                                   self._idcs_url + OAuthClient._ANDC_ROLE_EP,
                                   'role')
            self._log_verbose('Found role id ' + role_id)
            # Grant ANDC_FullAccessRole to custom client
            grant = OAuthClient._GRANT.format(andc.app_id, role_id,
                                              client_info.app_id)
            self._grant_role(auth, grant)
            self._log_verbose('Granted role to OAuth client')
            self._output(self._name + ' is created\nClient ID: ' +
                         client_info.oauth_id + '\nClient secret: ' +
                         client_info.secret)
            creds_path = self._creds_template(client_info.oauth_id,
                                              client_info.secret)
            self._output('Credential template file ' + creds_path)
        except Exception as e:
            self._output('Failed to create OAuth client ' + self._name)
            raise e

    def _do_delete(self):
        self._output('Deleting OAuth Client ' + self._name)
        try:
            auth = 'Bearer ' + self._get_bootstrap_token()
            # Find OAuth client AppId
            app_id = self._get_id(
                auth,
                self._idcs_url + OAuthClient._CLIENT_EP + self._name + '%22',
                'client')
            self._log_verbose('Found OAuth client AppId: ' + app_id)
            # Deactivate the OAuth client
            self._deactivate_app(auth, app_id)
            self._log_verbose('OAuth client deactivated')
            # Remove the OAuth client
            self._remove_client(auth, app_id)
            self._output(self._name + ' is deleted')
        except Exception as e:
            self._output('Failed to remove OAuth client ' + self._name)
            raise e

    def _do_verify(self, errors):
        self._output('Verifying OAuth Client ' + self._name)
        try:
            auth = 'Bearer ' + self._get_bootstrap_token()
            response = self._request_utils.do_get_request(
                self._idcs_url + OAuthClient._CLIENT_EP + self._name + '%22',
                Utils.scim_headers(self._host, auth), self._timeout_ms)
            self._check_not_none(response, 'client metadata')
            response_code = response.get_status_code()
            content = response.get_content()
            if response_code >= codes.multiple_choices:
                OAuthClient._idcs_errors(response,
                                         'Getting client ' + self._name)
            grants = Utils.get_field(content, 'allowedGrants')
            if grants is None:
                # No results in response
                raise IllegalStateException(
                    'OAuth Client ' + self._name + ' doesn\'t exist, or the ' +
                    'token file is invalid, user who downloads the token ' +
                    'must have Identity Domain Administrator role')
            # Verify if client has required grants
            self._verify_grants(grants, errors)
            # Verify if client has PSM and ANDC FQS
            self._verify_scopes(
                Utils.get_field(content, 'allowedScopes', 'fqs'), errors)
            # Verify if client has ANDC role
            self._verify_role(
                Utils.get_field(content, 'grantedAppRoles', 'display'), errors)
            if len(errors) > 0:
                return
            self._output('Verification succeed')
        except Exception as e:
            self._output('Verification failed of OAuth client ' + self._name)
            raise e

    def _get_andc_info(self, auth):
        # Get App ANDC metadata from IDCS
        response = self._request_utils.do_get_request(
            self._idcs_url + OAuthClient._ANDC_APP_EP,
            Utils.scim_headers(self._host, auth), self._timeout_ms)
        self._check_not_none(response, 'getting service metadata')
        content = response.get_content()
        if response.get_status_code() >= codes.multiple_choices:
            OAuthClient._idcs_errors(response, 'Getting service metadata')
        audience = 'audience'
        app_id = 'id'
        audience_value = Utils.get_field(content, audience)
        app_id_value = Utils.get_field(content, app_id)
        if audience_value is None or app_id_value is None:
            raise IllegalStateException(
                str.format('Unable to find {0} or {1} in ,' + content,
                           audience, app_id))
        return OAuthClient.ANDC(app_id_value, audience_value)

    def _get_bootstrap_token(self):
        # Read access token from given file
        with open(self._at_file, 'r') as at_file:
            content = at_file.read()
        bootstrap_token = loads(content)
        field = 'app_access_token'
        app_access_token = bootstrap_token.get(field)
        if app_access_token is None:
            raise IllegalStateException(
                'Access token file contains invalid value: ' + content)
        return app_access_token

    def _get_id(self, auth, url, resource):
        response = self._request_utils.do_get_request(
            url, Utils.scim_headers(self._host, auth), self._timeout_ms)
        self._check_not_none(response, 'getting ' + resource + ' id')
        if response.get_status_code() >= codes.multiple_choices:
            OAuthClient._idcs_errors(response, 'Getting id of ' + resource)
        return str(
            Utils.get_field(response.get_content(), 'id', allow_none=False))

    def _get_logger(self):
        """
        Returns the logger used for OAuthClient.
        """
        logger = getLogger(self.__class__.__name__)
        if self._verbose:
            logger.setLevel(WARNING)
        else:
            logger.setLevel(INFO)
        log_dir = (path.abspath(path.dirname(argv[0])) + sep + 'logs')
        if not path.exists(log_dir):
            mkdir(log_dir)
        logger.addHandler(FileHandler(log_dir + sep + 'oauth.log'))
        return logger

    def _get_psm_audience(self, auth):
        response = self._request_utils.do_get_request(
            self._idcs_url + OAuthClient._PSM_APP_EP,
            Utils.scim_headers(self._host, auth), self._timeout_ms)
        self._check_not_none(response, 'getting account metadata')
        if response.get_status_code() >= codes.multiple_choices:
            OAuthClient._idcs_errors(response, 'Getting account metadata')
        return str(
            Utils.get_field(response.get_content(),
                            'audience',
                            allow_none=False))

    def _grant_role(self, auth, payload):
        # Grant ANDC_FullAccessRole to OAuth client
        response = self._request_utils.do_post_request(
            self._idcs_url + Utils.GRANT_ENDPOINT,
            Utils.scim_headers(self._host, auth), payload, self._timeout_ms)
        self._check_not_none(response, ' response of granting role')
        if codes.ok <= response.get_status_code() < codes.multiple_choices:
            return
        OAuthClient._idcs_errors(response, 'Granting required role to client')

    def _log_verbose(self, msg):
        if self._verbose:
            print(msg)

    def _output(self, msg):
        print(msg)

    def _parse_args(self):
        parser = ArgumentParser(prog='OAuthClient')
        parser.add_argument(OAuthClient._IDCS_URL_FLAG,
                            required=True,
                            help='The idcs_url.',
                            metavar='<tenant-base IDCS URL>')
        parser.add_argument(
            OAuthClient._TOKEN_FILE_FLAG,
            required=True,
            help='The path of the token get from IDCS admin console.',
            metavar='<access token file path>')
        parser.add_argument(OAuthClient._NAME_FLAG,
                            default=OAuthClient._DEFAULT_NAME,
                            help='The OAuth Client name.',
                            metavar='<client name> default: NoSQLClient')
        parser.add_argument(
            OAuthClient._DIR_FLAG,
            help='The directory for generating the credentials file template.',
            metavar=('<credentials template directory path> ' +
                     'default: current dir'))
        parser.add_argument(OAuthClient._TIMEOUT_FLAG,
                            type=int,
                            default=Utils.DEFAULT_TIMEOUT_MS,
                            help='The timeout.',
                            metavar='<request timeout> default: 12000 ms')
        parser.add_argument(OAuthClient._CREATE_FLAG,
                            action='store_true',
                            help='To create the OAuth Client.')
        parser.add_argument(OAuthClient._DELETE_FLAG,
                            action='store_true',
                            help='To delete the OAuth Client.')
        parser.add_argument(OAuthClient._VERIFY_FLAG,
                            action='store_true',
                            help='To verify the OAuth Client.')
        parser.add_argument(OAuthClient._VERBOSE_FLAG,
                            action='store_true',
                            help='To log verbose information.')

        args = parser.parse_args()
        self._idcs_url = args.idcs_url
        self._at_file = args.token
        self._name = args.name
        self._temp_file_dir = args.credsdir
        self._timeout_ms = args.timeout
        self._create = args.create
        self._delete = args.delete
        self._verify = args.verify
        self._verbose = args.verbose

        if not (self._create or self._delete or self._verify):
            parser.error('Missing required argument ' +
                         OAuthClient._CREATE_FLAG + ' | ' +
                         OAuthClient._DELETE_FLAG + ' | ' +
                         OAuthClient._VERIFY_FLAG)

    def _remove_client(self, auth, app_id):
        response = self._request_utils.do_delete_request(
            self._idcs_url + Utils.APP_ENDPOINT + sep + app_id,
            Utils.scim_headers(self._host, auth), self._timeout_ms)
        self._check_not_none(response, 'response of deleting OAuth client')
        if codes.ok <= response.get_status_code() < codes.multiple_choices:
            return
        OAuthClient._idcs_errors(response,
                                 'removing OAuth client ' + self._name)

    def _verify_grants(self, grants, errors):
        self._log_verbose('OAuth client allowed grants: ' + str(grants))
        match = 0
        for grant in grants:
            if (grant.lower() == 'password'
                    or grant.lower() == 'client_credentials'):
                match += 1
        if match != 2:
            errors.append(
                'Missing required allowed grants, require Resource ' +
                'Owner and Client Credentials')
        self._log_verbose('Grants verification succeed')

    def _verify_role(self, roles, errors):
        if roles is None:
            raise IllegalStateException('OAuth client ' + self._name +
                                        ' doesn\'t have roles')
        self._log_verbose('OAuth client allowed roles: ' + str(roles))
        match = 0
        for role in roles:
            if role == 'ANDC_FullAccessRole':
                match += 1
        if match != 1:
            errors.append('Missing required role ANDC_FullAccessRole')
        self._log_verbose('Role verification succeed')

    def _verify_scopes(self, fqs_list, errors):
        self._log_verbose('OAuth client allowed scopes: ' + str(fqs_list))
        match = 0
        for fqs in fqs_list:
            if Utils.PSM_SCOPE in fqs or AccessTokenProvider.SCOPE in fqs:
                match += 1
        if match != 2:
            errors.append('Missing required OAuth scopes, client only have ' +
                          str(fqs_list))
        self._log_verbose('Scope verification succeed')

    @staticmethod
    def _idcs_errors(response, action):
        Utils.handle_idcs_errors(
            response, action, ' Access token in the token file expired,' +
            ' or the token file is generated with incorrect scopes,' +
            ' requires Identity Domain Administrator')

    class ANDC(object):
        def __init__(self, app_id, audience):
            self.app_id = app_id
            self.audience = audience

    class Client(object):
        def __init__(self, app_id, oauth_id, secret):
            self.app_id = app_id
            self.oauth_id = oauth_id
            self.secret = secret
コード例 #22
0
class LegendasTVProvider(Provider):
    """LegendasTV Provider.

    :param str username: username.
    :param str password: password.
    """

    languages = {
        Language.fromlegendastv(l)
        for l in language_converters['legendastv'].codes
    }
    server_url = 'http://legendas.tv/'
    subtitle_class = LegendasTVSubtitle

    def __init__(self, username=None, password=None):

        # Provider needs UNRAR installed. If not available raise ConfigurationError
        try:
            rarfile.custom_check(rarfile.UNRAR_TOOL)
        except rarfile.RarExecError:
            raise ConfigurationError('UNRAR tool not available')

        if any((username, password)) and not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % __short_version__

        # login
        if self.username and self.password:
            logger.info('Logging in')
            data = {
                '_method': 'POST',
                'data[User][username]': self.username,
                'data[User][password]': self.password
            }
            r = self.session.post(self.server_url + 'login',
                                  data,
                                  allow_redirects=False,
                                  timeout=10)
            raise_for_status(r)

            soup = ParserBeautifulSoup(r.content, ['html.parser'])
            if soup.find('div', {'class': 'alert-error'},
                         string=re.compile(u'Usuário ou senha inválidos')):
                raise AuthenticationError(self.username)

            logger.debug('Logged in')
            self.logged_in = True

    def terminate(self):
        # logout
        if self.logged_in:
            logger.info('Logging out')
            r = self.session.get(self.server_url + 'users/logout',
                                 allow_redirects=False,
                                 timeout=10)
            raise_for_status(r)
            logger.debug('Logged out')
            self.logged_in = False

        self.session.close()

    @staticmethod
    def is_valid_title(title, title_id, sanitized_title, season, year):
        """Check if is a valid title."""
        sanitized_result = sanitize(title['title'])
        if sanitized_result != sanitized_title:
            logger.debug("Mismatched title, discarding title %d (%s)",
                         title_id, sanitized_result)
            return

        # episode type
        if season:
            # discard mismatches on type
            if title['type'] != 'episode':
                logger.debug(
                    "Mismatched 'episode' type, discarding title %d (%s)",
                    title_id, sanitized_result)
                return

            # discard mismatches on season
            if 'season' not in title or title['season'] != season:
                logger.debug('Mismatched season %s, discarding title %d (%s)',
                             title.get('season'), title_id, sanitized_result)
                return
        # movie type
        else:
            # discard mismatches on type
            if title['type'] != 'movie':
                logger.debug(
                    "Mismatched 'movie' type, discarding title %d (%s)",
                    title_id, sanitized_result)
                return

            # discard mismatches on year
            if year is not None and 'year' in title and title['year'] != year:
                logger.debug("Mismatched movie year, discarding title %d (%s)",
                             title_id, sanitized_result)
                return
        return True

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME,
                               should_cache_fn=lambda value: value)
    def search_titles(self, title, season, title_year):
        """Search for titles matching the `title`.

        For episodes, each season has it own title
        :param str title: the title to search for.
        :param int season: season of the title
        :param int title_year: year of the title
        :return: found titles.
        :rtype: dict
        """
        titles = {}
        sanitized_titles = [sanitize(title)]
        ignore_characters = {'\'', '.'}
        if any(c in title for c in ignore_characters):
            sanitized_titles.append(
                sanitize(title, ignore_characters=ignore_characters))

        for sanitized_title in sanitized_titles:
            # make the query
            if season:
                logger.info('Searching episode title %r for season %r',
                            sanitized_title, season)
            else:
                logger.info('Searching movie title %r', sanitized_title)

            r = self.session.get(self.server_url +
                                 'legenda/sugestao/{}'.format(sanitized_title),
                                 timeout=10)
            raise_for_status(r)
            results = json.loads(r.text)

            # loop over results
            for result in results:
                source = result['_source']

                # extract id
                title_id = int(source['id_filme'])

                # extract type
                title = {'type': type_map[source['tipo']]}

                # extract title, year and country
                name, year, country = title_re.match(
                    source['dsc_nome']).groups()
                title['title'] = name

                # extract imdb_id
                if source['id_imdb'] != '0':
                    if not source['id_imdb'].startswith('tt'):
                        title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
                    else:
                        title['imdb_id'] = source['id_imdb']

                # extract season
                if title['type'] == 'episode':
                    if source['temporada'] and source['temporada'].isdigit():
                        title['season'] = int(source['temporada'])
                    else:
                        match = season_re.search(source['dsc_nome_br'])
                        if match:
                            title['season'] = int(match.group('season'))
                        else:
                            logger.debug(
                                'No season detected for title %d (%s)',
                                title_id, name)

                # extract year
                if year:
                    title['year'] = int(year)
                elif source['dsc_data_lancamento'] and source[
                        'dsc_data_lancamento'].isdigit():
                    # year is based on season air date hence the adjustment
                    title['year'] = int(
                        source['dsc_data_lancamento']) - title.get(
                            'season', 1) + 1

                # add title only if is valid
                # Check against title without ignored chars
                if self.is_valid_title(title, title_id, sanitized_titles[0],
                                       season, title_year):
                    titles[title_id] = title

            logger.debug('Found %d titles', len(titles))

        return titles

    @region.cache_on_arguments(
        expiration_time=timedelta(minutes=15).total_seconds())
    def get_archives(self, title_id, language_code, title_type, season,
                     episode):
        """Get the archive list from a given `title_id`, `language_code`, `title_type`, `season` and `episode`.

        :param int title_id: title id.
        :param int language_code: language code.
        :param str title_type: episode or movie
        :param int season: season
        :param int episode: episode
        :return: the archives.
        :rtype: list of :class:`LegendasTVArchive`

        """
        archives = []
        page = 0
        while True:
            # get the archive page
            url = self.server_url + 'legenda/busca/-/{language}/-/{page}/{title}'.format(
                language=language_code, page=page, title=title_id)
            r = self.session.get(url)
            raise_for_status(r)

            # parse the results
            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            for archive_soup in soup.select(
                    'div.list_element > article > div > div.f_left'):
                # create archive
                archive = LegendasTVArchive(
                    archive_soup.a['href'].split('/')[2], archive_soup.a.text,
                    'pack' in archive_soup.parent['class'], 'destaque'
                    in archive_soup.parent['class'],
                    self.server_url + archive_soup.a['href'][1:])
                # clean name of path separators and pack flags
                clean_name = archive.name.replace('/', '-')
                if archive.pack and clean_name.startswith('(p)'):
                    clean_name = clean_name[3:]

                # guess from name
                guess = guessit(clean_name, {'type': title_type})

                # episode
                if season and episode:
                    # discard mismatches on episode in non-pack archives

                    # Guessit may return int for single episode or list for multi-episode
                    # Check if archive name has multiple episodes releases on it
                    if not archive.pack and 'episode' in guess:
                        wanted_episode = set(episode) if isinstance(
                            episode, list) else {episode}
                        archive_episode = guess['episode'] if isinstance(
                            guess['episode'], list) else {guess['episode']}

                        if not wanted_episode.intersection(archive_episode):
                            logger.debug(
                                'Mismatched episode %s, discarding archive: %s',
                                guess['episode'], clean_name)
                            continue

                # extract text containing downloads, rating and timestamp
                data_text = archive_soup.find('p', class_='data').text

                # match downloads
                archive.downloads = int(
                    downloads_re.search(data_text).group('downloads'))

                # match rating
                match = rating_re.search(data_text)
                if match:
                    archive.rating = int(match.group('rating'))

                # match timestamp and validate it
                time_data = {
                    k: int(v)
                    for k, v in timestamp_re.search(
                        data_text).groupdict().items()
                }
                archive.timestamp = pytz.timezone(
                    'America/Sao_Paulo').localize(datetime(**time_data))
                if archive.timestamp > datetime.utcnow().replace(
                        tzinfo=pytz.utc):
                    raise ProviderError('Archive timestamp is in the future')

                # add archive
                logger.info(
                    'Found archive for title %d and language %d at page %s: %s',
                    title_id, language_code, page, archive)
                archives.append(archive)

            # stop on last page
            if soup.find('a',
                         attrs={'class': 'load_more'},
                         string='carregar mais') is None:
                break

            # increment page count
            page += 1

        logger.debug('Found %d archives', len(archives))

        return archives

    def download_archive(self, archive):
        """Download an archive's :attr:`~LegendasTVArchive.content`.

        :param archive: the archive to download :attr:`~LegendasTVArchive.content` of.
        :type archive: :class:`LegendasTVArchive`

        """
        logger.info('Downloading archive %s', archive.id)
        r = self.session.get(self.server_url +
                             'downloadarquivo/{}'.format(archive.id))
        raise_for_status(r)

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Identified rar archive')
            archive.content = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Identified zip archive')
            archive.content = ZipFile(archive_stream)
        else:
            raise ValueError('Not a valid archive')

    def query(self, language, title, season=None, episode=None, year=None):
        # search for titles
        titles = self.search_titles(title, season, year)

        subtitles = []
        # iterate over titles
        for title_id, t in titles.items():

            logger.info('Getting archives for title %d and language %d',
                        title_id, language.legendastv)
            archives = self.get_archives(title_id, language.legendastv,
                                         t['type'], season, episode)
            if not archives:
                logger.info('No archives found for title %d and language %d',
                            title_id, language.legendastv)

            # iterate over title's archives
            for a in archives:

                # compute an expiration time based on the archive timestamp
                expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) -
                                   a.timestamp).total_seconds()

                # attempt to get the releases from the cache
                cache_key = releases_key.format(archive_id=a.id,
                                                archive_name=a.name)
                releases = region.get(cache_key,
                                      expiration_time=expiration_time)

                # the releases are not in cache or cache is expired
                if releases == NO_VALUE:
                    logger.info('Releases not found in cache')

                    # download archive
                    self.download_archive(a)

                    # extract the releases
                    releases = []
                    for name in a.content.namelist():
                        # discard the legendastv file
                        if name.startswith('Legendas.tv'):
                            continue

                        # discard hidden files
                        if os.path.split(name)[-1].startswith('.'):
                            continue

                        # discard non-subtitle files
                        if not name.lower().endswith(SUBTITLE_EXTENSIONS):
                            continue

                        releases.append(name)

                    # cache the releases
                    region.set(cache_key, releases)

                # iterate over releases
                for r in releases:
                    subtitle = self.subtitle_class(language, t['type'],
                                                   t['title'], t.get('year'),
                                                   t.get('imdb_id'),
                                                   t.get('season'), a, r)
                    logger.debug('Found subtitle %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
            season = video.season
            episode = video.episode
        else:
            titles = [video.title] + video.alternative_titles

        for title in titles:
            subtitles = [
                s for l in languages for s in self.query(
                    l, title, season=season, episode=episode, year=video.year)
            ]
            if subtitles:
                return subtitles

        return []

    def download_subtitle(self, subtitle):
        # download archive in case we previously hit the releases cache and didn't download it
        if subtitle.archive.content is None:
            self.download_archive(subtitle.archive)

        # extract subtitle's content
        subtitle.content = fix_line_ending(
            subtitle.archive.content.read(subtitle.name))
コード例 #23
0
class Crawler:
    '''Blueprint for creating new crawlers'''
    def __init__(self):
        self._destroyed = False
        self.executor = futures.ThreadPoolExecutor(max_workers=3)

        # Initialize cloudscrapper
        try:
            self.scraper = cloudscraper.create_scraper(browser={
                'platform': 'linux',
                'mobile': False
            })
        except Exception as err:
            logger.exception('Failed to initialize cloudscraper')
            self.scraper = Session()
        # end try

        # Must resolve these fields inside `read_novel_info`
        self.novel_title = 'N/A'
        self.novel_author = 'N/A'
        self.novel_cover = None
        self.is_rtl = False

        # Each item must contain these keys:
        # `id` - 1 based index of the volume
        # `title` - the volume title (can be ignored)
        self.volumes = []

        # Each item must contain these keys:
        # `id` - 1 based index of the chapter
        # `title` - the title name
        # `volume` - the volume id of this chapter
        # `volume_title` - the volume title (can be ignored)
        # `url` - the link where to download the chapter
        self.chapters = []

        # Other stuffs - not necessary to resolve from crawler instance.
        self.home_url = ''
        self.novel_url = ''
        self.last_visited_url = None

    # end def

    def destroy(self):
        self._destroyed = True
        self.volumes.clear()
        self.chapters.clear()
        self.scraper.close()
        self.executor.shutdown(False)

    # end def

    # ------------------------------------------------------------------------- #
    # Implement these methods
    # ------------------------------------------------------------------------- #

    @abstractmethod
    def initialize(self):
        pass

    # end def

    @abstractmethod
    def login(self, email, password):
        pass

    # end def

    @abstractmethod
    def logout(self):
        pass

    # end def

    @abstractmethod
    def search_novel(self, query):
        '''Gets a list of results matching the given query'''
        pass

    # end def

    @abstractmethod
    def read_novel_info(self):
        '''Get novel title, autor, cover etc'''
        pass

    # end def

    @abstractmethod
    def download_chapter_body(self, chapter):
        '''Download body of a single chapter and return as clean html format.'''
        pass

    # end def

    def get_chapter_index_of(self, url):
        '''Return the index of chapter by given url or 0'''
        url = (url or '').strip().strip('/')
        for chapter in self.chapters:
            if chapter['url'] == url:
                return chapter['id']
            # end if
        # end for
        return 0

    # end def

    # ------------------------------------------------------------------------- #
    # Helper methods to be used
    # ------------------------------------------------------------------------- #
    @property
    def headers(self):
        return self.scraper.headers.copy()

    # end def

    @property
    def cookies(self):
        return {x.name: x.value for x in self.scraper.cookies}

    # end def

    def absolute_url(self, url, page_url=None):
        url = (url or '').strip()
        if not page_url:
            page_url = self.last_visited_url
        # end if
        if not url or len(url) == 0:
            return None
        elif url.startswith('//'):
            return self.home_url.split(':')[0] + ':' + url
        elif url.find('//') >= 0:
            return url
        elif url.startswith('/'):
            return self.home_url + url[1:]
        elif page_url:
            return page_url.strip('/') + '/' + url
        else:
            return self.home_url + url
        # end if

    # end def

    def is_relative_url(self, url):
        page = urlparse(self.novel_url)
        url = urlparse(url)
        return (page.hostname == url.hostname
                and url.path.startswith(page.path))

    # end def

    def get_response(self, url, **kargs):
        if self._destroyed:
            return None
        # end if
        kargs = kargs or dict()
        # kargs['verify'] = kargs.get('verify', False)
        kargs['timeout'] = kargs.get('timeout', 150)  # in seconds
        self.last_visited_url = url.strip('/')
        response = self.scraper.get(url, **kargs)
        response.encoding = 'utf-8'
        self.cookies.update({x.name: x.value for x in response.cookies})
        response.raise_for_status()
        return response

    # end def

    def submit_form(self, url, data={}, multipart=False, headers={}):
        '''Submit a form using post request'''
        if self._destroyed:
            return None
        # end if

        headers.update({
            'Content-Type':
            'multipart/form-data' if multipart else
            'application/x-www-form-urlencoded; charset=UTF-8',
        })

        response = self.scraper.post(url, data=data, headers=headers)
        response.encoding = 'utf-8'
        self.cookies.update({x.name: x.value for x in response.cookies})
        response.raise_for_status()
        return response

    # end def

    def get_soup(self, *args, **kwargs):
        parser = kwargs.pop('parser', None)
        response = self.get_response(*args, **kwargs)
        return self.make_soup(response, parser)

    # end def

    def make_soup(self, response, parser=None):
        html = response.content.decode('utf-8', 'ignore')
        soup = BeautifulSoup(html, parser or 'lxml')
        if not soup.find('body'):
            raise ConnectionError('HTML document was not loaded properly')
        # end if
        return soup

    # end def

    def get_json(self, *args, **kargs):
        response = self.get_response(*args, **kargs)
        return response.json()

    # end def

    def download_cover(self, output_file):
        response = self.get_response(self.novel_cover)
        with open(output_file, 'wb') as f:
            f.write(response.content)
        # end with

    # end def

    # ------------------------------------------------------------------------- #

    blacklist_patterns = [
        r'^[\W\D]*(volume|chapter)[\W\D]+\d+[\W\D]*$',
    ]
    bad_tags = [
        'noscript', 'script', 'iframe', 'form', 'hr', 'img', 'ins', 'button',
        'input', 'amp-auto-ads', 'pirate'
    ]
    block_tags = ['h3', 'div', 'p']

    def is_blacklisted(self, text):
        if len(text.strip()) == 0:
            return True
        # end if
        for pattern in self.blacklist_patterns:
            if re.search(pattern, text, re.IGNORECASE):
                return True
            # end if
        # end for
        return False

    # end def

    def clean_contents(self, div):
        if not div:
            return div
        # end if
        div.attrs = {}
        for tag in div.find_all(True):
            if isinstance(tag, Comment):
                tag.extract()  # Remove comments
            elif tag.name == 'br':
                next_tag = getattr(tag, 'next_sibling')
                if next_tag and getattr(next_tag, 'name') == 'br':
                    tag.extract()
                # end if
            elif tag.name in self.bad_tags:
                tag.extract()  # Remove bad tags
            elif not tag.text.strip():
                tag.extract()  # Remove empty tags
            elif self.is_blacklisted(tag.text):
                tag.extract()  # Remove blacklisted contents
            elif hasattr(tag, 'attrs'):
                tag.attrs = {}  # Remove attributes
            # end if
        # end for
        return div

    # end def

    def extract_contents(self, tag, level=0):
        body = []
        if level == 0:
            self.clean_contents(tag)
        # end if

        for elem in tag.contents:
            if self.block_tags.count(elem.name):
                body += self.extract_contents(elem, level + 1)
                continue
            # end if
            text = ''
            if not elem.name:
                text = str(elem).strip()
            else:
                text = '<%s>%s</%s>'
                text = text % (elem.name, elem.text.strip(), elem.name)
            # end if
            if text:
                body.append(text)
            # end if
        # end for

        if level > 0:
            return body
        else:
            return [x for x in body if len(x.strip())]
        # end if

    # end def

    def cleanup_text(self, text):
        return re.sub(u'[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]',
                      '',
                      str(text),
                      flags=re.UNICODE)
コード例 #24
0
ファイル: ipa.py プロジェクト: jlaunonen/kompassi
class IPASession(object):
    # Public API
    def __init__(self, username, password, login=True):
        self.username = username
        self.password = password
        self.login_on_enter = login
        self.session = Session()
        self.session.headers = dict(IPA_HEADERS)

    def __enter__(self):
        if self.login_on_enter:
            self._login()

        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.session.close()

    def change_own_password(self, new_password):
        """
        Do not login first, just use an unauthenticated session for this one.

        >>> with IPASession(username, old_password, login=False) as session:
        ...     session.change_own_password(new_password)
        """
        return self._send_form(
            IPA_CHANGE_PASSWORD_URL, user=self.username, old_password=self.password, new_password=new_password
        )

    def get_user_info(self, username=None):
        if username is None:
            username = self.username

        return self._json_rpc("user_show", username)["result"]["result"]

    def change_password_for_another_user(self, username, new_password):
        return self._json_rpc("passwd", username, new_password, IPA_OTHER_USER_PASSWORD_MAGICK)

    def add_user_to_group(self, username, groupname):
        return self._json_rpc("group_add_member", groupname, user=[username])

    def remove_user_from_group(self, username, groupname):
        return self._json_rpc("group_remove_member", groupname, user=[username])

    def create_user(self, username, first_name, surname, password):
        return self._json_rpc("user_add", username, givenname=first_name, sn=surname, userpassword=password)

    def create_group(self, group_name):
        try:
            return self._json_rpc("group_add", group_name, description=group_name)
        except IPAError as e:
            try:
                error, = e.args
                code = error["code"]
            except (KeyError, IndexError):
                # ipa connectivity error or something else, bad
                raise e
            else:
                if code == IPA_GROUP_ADD_ERROR_ALREADY_EXISTS:
                    # group already exists
                    # we are under "ensure exists" semantics so this is kosher
                    return None
                else:
                    # some other error
                    raise e

    # Internal implementation
    def _login(self):
        return self._send_form(IPA_LOGIN_URL, user=self.username, password=self.password)

    def _send_form(self, url, **payload):
        response = self.session.post(url, data=payload, verify=settings.KOMPASSI_IPA_CACERT_PATH)

        try:
            response.raise_for_status()
        except HTTPError as e:
            logger.exception("IPA login failed: %s", response.content)
            raise IPAError(e)

        return True

    def _json_rpc(self, method_name, *args, **kwargs):
        headers = {"Content-Type": "application/json", "Accept": "application/json"}

        payload = {"params": [args, kwargs], "method": method_name, "id": 0}

        response = self.session.post(
            IPA_JSONRPC_URL, data=json.dumps(payload), headers=headers, verify=settings.KOMPASSI_IPA_CACERT_PATH
        )

        try:
            response.raise_for_status()
        except requests.HTTPError, e:
            logger.exception("IPA JSON-RPC call failed: %s", response.content)
            raise IPAError(e)

        result = response.json()

        error = result.get("error", None)
        if error:
            raise IPAError(error)

        return result
コード例 #25
0
ファイル: addic7ed.py プロジェクト: pannal/Sub-Zero.bundle
class Addic7edProvider(_Addic7edProvider):
    languages = {Language('por', 'BR')} | {Language(l) for l in [
        'ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg',
        'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus',
        'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho'
    ]} | {Language.fromietf(l) for l in ["sr-Latn", "sr-Cyrl"]}

    USE_ADDICTED_RANDOM_AGENTS = False
    hearing_impaired_verifiable = True
    subtitle_class = Addic7edSubtitle
    server_url = 'https://www.addic7ed.com/'

    sanitize_characters = {'-', ':', '(', ')', '.', '/'}

    def __init__(self, username=None, password=None, use_random_agents=False):
        super(Addic7edProvider, self).__init__(username=username, password=password)
        self.USE_ADDICTED_RANDOM_AGENTS = use_random_agents

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__

        from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
        logger.debug("Addic7ed: using random user agents")
        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
        self.session.headers['Referer'] = self.server_url

        # login
        if self.username and self.password:
            def check_verification(cache_region):
                rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10,
                                      headers={"Referer": self.server_url})
                if rr.status_code == 302:
                    logger.info('Addic7ed: Login expired')
                    cache_region.delete("addic7ed_data")
                else:
                    logger.info('Addic7ed: Re-using old login')
                    self.logged_in = True
                    return True

            if load_verification("addic7ed", self.session, callback=check_verification):
                return

            logger.info('Addic7ed: Logging in')
            data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '',
                    'remember': 'true'}

            tries = 0
            while tries < 3:
                r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url})
                if "grecaptcha" in r.content:
                    logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
                                'happen once every so often')

                    site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1)
                    if not site_key:
                        logger.error("Addic7ed: Captcha site-key not found!")
                        return

                    pitcher = pitchers.get_pitcher()("Addic7ed", self.server_url + 'login.php', site_key,
                                                     user_agent=self.session.headers["User-Agent"],
                                                     cookies=self.session.cookies.get_dict(),
                                                     is_invisible=True)

                    result = pitcher.throw()
                    if not result:
                        raise Exception("Addic7ed: Couldn't solve captcha!")

                    data["recaptcha_response"] = result

                r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
                                      headers={"Referer": self.server_url + "login.php"})

                if "relax, slow down" in r.content:
                    raise TooManyRequests(self.username)

                if r.status_code != 302:
                    if "User <b></b> doesn't exist" in r.content and tries <= 2:
                        logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3)
                        tries += 1
                        continue

                    raise AuthenticationError(self.username)
                break

            store_verification("addic7ed", self.session)

            logger.debug('Addic7ed: Logged in')
            self.logged_in = True

    def terminate(self):
        self.session.close()

    def get_show_id(self, series, year=None, country_code=None):
        """Get the best matching show id for `series`, `year` and `country_code`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :param country_code: country code of the series, if any.
        :type country_code: str
        :return: the show id, if found.
        :rtype: int

        """
        series_sanitized = sanitize(series).lower()
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with country
        if not show_id and country_code:
            logger.debug('Getting show id with country')
            show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))

        # attempt with year
        if not show_id and year:
            logger.debug('Getting show id with year')
            show_id = show_ids.get('%s %d' % (series_sanitized, year))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        # search as last resort
        # broken right now
        # if not show_id:
        #     logger.warning('Series %s not found in show ids', series)
        #     show_id = self._search_show_id(series)

        return show_id

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.
        :return: show id per series, lower case and without quotes.
        :rtype: dict

        # patch: add punctuation cleaning
        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.content and use 'html.parser'
            soup = ParserBeautifulSoup(r.content, ['html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td > h3 > a[href^="/show/"]'):
            show_clean = sanitize(show.text, default_characters=self.sanitize_characters)
            try:
                show_id = int(show['href'][6:])
            except ValueError:
                continue

            show_ids[show_clean] = show_id
            match = series_year_re.match(show_clean)
            if match and match.group(2) and match.group(1) not in show_ids:
                # year found, also add it without year
                show_ids[match.group(1)] = show_id

        soup.decompose()
        soup = None

        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)

        # currently addic7ed searches via srch.php from the front page, then a re-search is needed which calls
        # search.php
        for endpoint in ("srch.php", "search.php",):
            headers = None
            if endpoint == "search.php":
                headers = {
                    "referer": self.server_url + "srch.php"
                }
            r = self.session.get(self.server_url + endpoint, params=params, timeout=10, headers=headers)
            r.raise_for_status()

            if r.content and "Sorry, your search" not in r.content:
                break

            time.sleep(4)

        if r.status_code == 304:
            raise TooManyRequests()

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        suggestion = None

        # get the suggestion
        try:
            suggestion = soup.select('span.titulo > a[href^="/show/"]')
            if not suggestion:
                logger.warning('Show id not found: no suggestion')
                return None
            if not sanitize(suggestion[0].i.text.replace('\'', ' '),
                            default_characters=self.sanitize_characters) == \
                    sanitize(series_year, default_characters=self.sanitize_characters):
                logger.warning('Show id not found: suggestion does not match')
                return None
            show_id = int(suggestion[0]['href'][6:])
            logger.debug('Found show id %d', show_id)

            return show_id
        finally:
            soup.decompose()
            soup = None

    def query(self, show_id, series, season, year=None, country=None):
        # patch: fix logging

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id, season)
        r = self.session.get(self.server_url + 'ajax_loadShow.php',
                             params={'show': show_id, 'season': season},
                             timeout=10,
                             headers={
                                 "referer": "%sshow/%s" % (self.server_url, show_id),
                                 "X-Requested-With": "XMLHttpRequest"
                             }
                             )

        r.raise_for_status()

        if r.status_code == 304:
            raise TooManyRequests()

        if not r.content:
            # Provider wrongful return a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.error('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if status != 'Completed':
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title,
                                           year,
                                           version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        soup.decompose()
        soup = None

        return subtitles

    def download_subtitle(self, subtitle):
        # download the subtitle
        r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        if r.status_code == 304:
            raise TooManyRequests()

        if not r.content:
            # Provider wrongful return a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.error('Unable to download subtitle. No data returned from provider')
            return

        # detect download limit exceeded
        if r.headers['Content-Type'] == 'text/html':
            raise DownloadLimitExceeded

        subtitle.content = fix_line_ending(r.content)
コード例 #26
0
ファイル: addic7ed.py プロジェクト: ribaaa/bazarr
class Addic7edProvider(_Addic7edProvider):
    languages = {Language('por', 'BR')} | {
        Language(l)
        for l in [
            'ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu',
            'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg', 'heb', 'hrv',
            'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld',
            'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi',
            'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho'
        ]
    } | {Language.fromietf(l)
         for l in ["sr-Latn", "sr-Cyrl"]}

    USE_ADDICTED_RANDOM_AGENTS = False
    hearing_impaired_verifiable = True
    subtitle_class = Addic7edSubtitle
    server_url = 'https://www.addic7ed.com/'

    sanitize_characters = {'-', ':', '(', ')', '.', '/'}
    last_show_ids_fetch_key = "addic7ed_last_id_fetch"

    def __init__(self, username=None, password=None, use_random_agents=False):
        super(Addic7edProvider, self).__init__(username=username,
                                               password=password)
        self.USE_ADDICTED_RANDOM_AGENTS = use_random_agents

        if not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__

        from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
        logger.debug("Addic7ed: using random user agents")
        self.session.headers['User-Agent'] = AGENT_LIST[randint(
            0,
            len(AGENT_LIST) - 1)]
        self.session.headers['Referer'] = self.server_url

        # login
        if self.username and self.password:

            def check_verification(cache_region):
                try:
                    rr = self.session.get(self.server_url + 'panel.php',
                                          allow_redirects=False,
                                          timeout=10,
                                          headers={"Referer": self.server_url})
                    if rr.status_code == 302:
                        logger.info('Addic7ed: Login expired')
                        cache_region.delete("addic7ed_data")
                    else:
                        logger.info('Addic7ed: Re-using old login')
                        self.logged_in = True
                        return True
                except (ConnectionError, ConnectTimeout) as e:
                    logger.debug(
                        "Addic7ed: There was a problem reaching the server: %s."
                        % e)
                    raise IPAddressBlocked(
                        "Addic7ed: Your IP is temporarily blocked.")

            if load_verification("addic7ed",
                                 self.session,
                                 callback=check_verification):
                return

            logger.info('Addic7ed: Logging in')
            data = {
                'username': self.username,
                'password': self.password,
                'Submit': 'Log in',
                'url': '',
                'remember': 'true'
            }

            tries = 0
            while tries <= 3:
                tries += 1
                r = self.session.get(self.server_url + 'login.php',
                                     timeout=10,
                                     headers={"Referer": self.server_url})
                if "g-recaptcha" in r.text or "grecaptcha" in r.text:
                    logger.info(
                        'Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
                        'happen once every so often')

                    for g, s in (("g-recaptcha-response",
                                  r'g-recaptcha.+?data-sitekey=\"(.+?)\"'),
                                 ("recaptcha_response",
                                  r'grecaptcha.execute\(\'(.+?)\',')):
                        site_key = re.search(s, r.text).group(1)
                        if site_key:
                            break
                    if not site_key:
                        logger.error("Addic7ed: Captcha site-key not found!")
                        return

                    pitcher = pitchers.get_pitcher()(
                        "Addic7ed",
                        self.server_url + 'login.php',
                        site_key,
                        user_agent=self.session.headers["User-Agent"],
                        cookies=self.session.cookies.get_dict(),
                        is_invisible=True)

                    result = pitcher.throw()
                    if not result:
                        if tries >= 3:
                            raise Exception(
                                "Addic7ed: Couldn't solve captcha!")
                        logger.info(
                            "Addic7ed: Couldn't solve captcha! Retrying")
                        continue

                    data[g] = result

                r = self.session.post(
                    self.server_url + 'dologin.php',
                    data,
                    allow_redirects=False,
                    timeout=10,
                    headers={"Referer": self.server_url + "login.php"})

                if "relax, slow down" in r.text:
                    raise TooManyRequests(self.username)

                if "Wrong password" in r.text or "doesn't exist" in r.text:
                    raise AuthenticationError(self.username)

                if r.status_code != 302:
                    if tries >= 3:
                        logger.error(
                            "Addic7ed: Something went wrong when logging in")
                        raise AuthenticationError(self.username)
                    logger.info(
                        "Addic7ed: Something went wrong when logging in; retrying"
                    )
                    continue
                break

            store_verification("addic7ed", self.session)

            logger.debug('Addic7ed: Logged in')
            self.logged_in = True

    def terminate(self):
        self.session.close()

    def get_show_id(self,
                    series,
                    year=None,
                    country_code=None,
                    ignore_cache=False):
        """Get the best matching show id for `series`, `year` and `country_code`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :param country_code: country code of the series, if any.
        :type country_code: str
        :return: the show id, if found.
        :rtype: int
        """
        show_id = None
        ids_to_look_for = {
            sanitize(series).lower(),
            sanitize(series.replace(".", "")).lower(),
            sanitize(series.replace("&", "and")).lower()
        }
        show_ids = self._get_show_ids()
        if ignore_cache or not show_ids:
            show_ids = self._get_show_ids.refresh(self)

        logger.debug("Trying show ids: %s", ids_to_look_for)
        for series_sanitized in ids_to_look_for:
            # attempt with country
            if not show_id and country_code:
                logger.debug('Getting show id with country')
                show_id = show_ids.get(
                    '%s %s' % (series_sanitized, country_code.lower()))

            # attempt with year
            if not show_id and year:
                logger.debug('Getting show id with year')
                show_id = show_ids.get('%s %d' % (series_sanitized, year))

            # attempt clean
            if not show_id:
                logger.debug('Getting show id')
                show_id = show_ids.get(series_sanitized)

                if not show_id:
                    now = datetime.datetime.now()
                    last_fetch = region.get(self.last_show_ids_fetch_key)

                    # re-fetch show ids once per day if any show ID not found
                    if not ignore_cache and last_fetch != NO_VALUE and last_fetch + datetime.timedelta(
                            days=1) < now:
                        logger.info("Show id not found; re-fetching show ids")
                        return self.get_show_id(series,
                                                year=year,
                                                country_code=country_code,
                                                ignore_cache=True)
                    logger.debug(
                        "Not refreshing show ids, as the last fetch has been too recent"
                    )

            # search as last resort
            # broken right now
            # if not show_id:
            #     logger.warning('Series %s not found in show ids', series)
            #     show_id = self._search_show_id(series)

        return show_id

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.
        :return: show id per series, lower case and without quotes.
        :rtype: dict

        # patch: add punctuation cleaning
        """
        # get the show page
        logger.info('Getting show ids')
        region.set(self.last_show_ids_fetch_key, datetime.datetime.now())

        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(
                b''.join(show_cells).decode('utf-8', 'ignore'),
                ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.text and use 'html.parser'
            soup = ParserBeautifulSoup(r.text, ['html.parser'])

        # populate the show ids
        show_ids = {}
        shows = soup.select('td > h3 > a[href^="/show/"]')
        for show in shows:
            show_clean = sanitize(show.text,
                                  default_characters=self.sanitize_characters)
            try:
                show_id = int(show['href'][6:])
            except ValueError:
                continue

            show_ids[show_clean] = show_id
            match = series_year_re.match(show_clean)
            if match and match.group(2) and match.group(1) not in show_ids:
                # year found, also add it without year
                show_ids[match.group(1)] = show_id

        soup.decompose()
        soup = None

        logger.debug('Found %d show ids', len(show_ids))

        if not show_ids:
            raise Exception("Addic7ed: No show IDs found!")

        return show_ids

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)

        # currently addic7ed searches via srch.php from the front page, then a re-search is needed which calls
        # search.php
        for endpoint in (
                "srch.php",
                "search.php",
        ):
            headers = None
            if endpoint == "search.php":
                headers = {"referer": self.server_url + "srch.php"}
            r = self.session.get(self.server_url + endpoint,
                                 params=params,
                                 timeout=10,
                                 headers=headers)
            r.raise_for_status()

            if r.text and "Sorry, your search" not in r.text:
                break

            time.sleep(4)

        if r.status_code == 304:
            raise TooManyRequests()

        soup = ParserBeautifulSoup(r.text, ['lxml', 'html.parser'])

        suggestion = None

        # get the suggestion
        try:
            suggestion = soup.select('span.titulo > a[href^="/show/"]')
            if not suggestion:
                logger.warning('Show id not found: no suggestion')
                return None
            if not sanitize(suggestion[0].i.text.replace('\'', ' '),
                            default_characters=self.sanitize_characters) == \
                    sanitize(series_year, default_characters=self.sanitize_characters):
                logger.warning('Show id not found: suggestion does not match')
                return None
            show_id = int(suggestion[0]['href'][6:])
            logger.debug('Found show id %d', show_id)

            return show_id
        finally:
            soup.decompose()
            soup = None

    def query(self, show_id, series, season, year=None, country=None):
        # patch: fix logging

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)
        r = self.session.get(self.server_url + 'ajax_loadShow.php',
                             params={
                                 'show': show_id,
                                 'season': season
                             },
                             timeout=10,
                             headers={
                                 "referer":
                                 "%sshow/%s" % (self.server_url, show_id),
                                 "X-Requested-With":
                                 "XMLHttpRequest"
                             })

        r.raise_for_status()

        if r.status_code == 304:
            raise TooManyRequests()

        if not r.text:
            # Provider wrongful return a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.error('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.text, ['lxml', 'html.parser'])

        # loop over subtitle rows
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if "%" in status:
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = self.subtitle_class(language, hearing_impaired,
                                           page_link, series, season, episode,
                                           title, year, version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        soup.decompose()
        soup = None

        return subtitles

    def download_subtitle(self, subtitle):
        # download the subtitle
        r = self.session.get(self.server_url + subtitle.download_link,
                             headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        if r.status_code == 304:
            raise TooManyRequests()

        if not r.text:
            # Provider wrongful return a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.error(
                'Unable to download subtitle. No data returned from provider')
            return

        # detect download limit exceeded
        if r.headers['Content-Type'] == 'text/html':
            raise DownloadLimitExceeded

        subtitle.content = fix_line_ending(r.content)
コード例 #27
0
class SuperSubtitlesProvider(Provider, ProviderSubtitleArchiveMixin):
    """SuperSubtitles Provider."""
    languages = {Language('hun', 'HU')} | {Language(l) for l in ['hun', 'eng']}
    video_types = (Episode, Movie)
    # https://www.feliratok.info/?search=&soriSorszam=&nyelv=&sorozatnev=The+Flash+%282014%29&sid=3212&complexsearch=true&knyelv=0&evad=4&epizod1=1&cimke=0&minoseg=0&rlsr=0&tab=all
    server_url = 'https://www.feliratok.info/'
    subtitle_class = SuperSubtitlesSubtitle
    hearing_impaired_verifiable = False
    multi_result_throttle = 2  # seconds

    def initialize(self):
        self.session = Session()
        self.session.headers = {
            'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
        }

    def terminate(self):
        self.session.close()

    def get_language(self, text):
        if text == 'Magyar':
            return Language.fromsupersubtitles('hu')
        if text == 'Angol':
            return Language.fromsupersubtitles('en')
        return None

    def find_imdb_id(self, sub_id):
        """

        """

        url = self.server_url + "index.php?tipus=adatlap&azon=a_" + sub_id
        # url = https://www.feliratok.info/index.php?tipus=adatlap&azon=a_1518600916
        logger.info('Get IMDB id from URL %s', url)
        r = self.session.get(url, timeout=10).content

        soup = ParserBeautifulSoup(r, ['lxml'])
        links = soup.find_all("a")

        for value in links:
            if "imdb.com" in str(value):
                # <a alt="iMDB" href="http://www.imdb.com/title/tt2357547/" target="_blank"><img alt="iMDB" src="img/adatlap/imdb.png"/></a>
                imdb_id = re.findall(r'(?<=www\.imdb\.com/title/).*(?=/")',
                                     str(value))[0]
                return imdb_id

        return None

    def find_id(self, series, year, original_title):
        """
        We need to find the id of the series at the following url:
        https://www.feliratok.info/index.php?term=SERIESNAME&nyelv=0&action=autoname
        Where SERIESNAME is a searchable string.
        The result will be something like this:
        [{"name":"DC\u2019s Legends of Tomorrow (2016)","ID":"3725"},{"name":"Miles from Tomorrowland (2015)","ID":"3789"}
        ,{"name":"No Tomorrow (2016)","ID":"4179"}]

        """

        # Search for exact name
        url = self.server_url + "index.php?term=" + series + "&nyelv=0&action=autoname"
        # url = self.server_url + "index.php?term=" + "fla"+ "&nyelv=0&action=autoname"
        logger.info('Get series id from URL %s', url)
        r = self.session.get(url, timeout=10)

        # r is something like this:
        # [{"name":"DC\u2019s Legends of Tomorrow (2016)","ID":"3725"},{"name":"Miles from Tomorrowland (2015)","ID":"3789"}
        # ,{"name":"No Tomorrow (2016)","ID":"4179"}]

        results = r.json()

        # check all of the results:
        for result in results:
            try:
                # "name":"Miles from Tomorrowland (2015)","ID":"3789"
                result_year = re.findall(r"(?<=\()\d\d\d\d(?=\))",
                                         result['name'])[0]
            except IndexError:
                result_year = ""

            try:
                # "name":"Miles from Tomorrowland (2015)","ID":"3789"
                result_title = re.findall(r".*(?=\(\d\d\d\d\))",
                                          result['name'])[0]
                result_id = result['ID']
            except IndexError:
                continue

            result_title = result_title.strip().replace("�",
                                                        "").replace(" ", ".")

            guessable = result_title.strip() + ".s01e01." + result_year
            guess = guessit(guessable, {'type': "episode"})

            if sanitize(original_title) == sanitize(
                    guess['title']
            ) and year and guess['year'] and year == guess['year']:
                # Return the founded id
                return result_id

        return None

    def query(self, series, video=None):
        year = video.year
        subtitle = None
        if isinstance(video, Episode):
            series = video.series
            season = video.season
            episode = video.episode
            #seriesa = series.replace(' ', '+')

            # Get ID of series with original name
            series_id = self.find_id(series, year, series)
            if not series_id:
                # If not founded try without ' char
                modified_series = series.replace(' ', '+').replace('\'', '')
                series_id = self.find_id(modified_series, year, series)
                if not series_id and modified_series:
                    # If still not founded try with the longest word is series title
                    modified_series = modified_series.split('+')
                    modified_series = max(modified_series, key=len)
                    series_id = self.find_id(modified_series, year, series)

                    if not series_id:
                        return None

            # https://www.feliratok.info/index.php?search=&soriSorszam=&nyelv=&sorozatnev=&sid=2075&complexsearch=true&knyelv=0&evad=6&epizod1=16&cimke=0&minoseg=0&rlsr=0&tab=all
            url = self.server_url + "index.php?search=&soriSorszam=&nyelv=&sorozatnev=&sid=" + \
                  str(series_id) + "&complexsearch=true&knyelv=0&evad=" + str(season) + "&epizod1=" + str(
                episode) + "&cimke=0&minoseg=0&rlsr=0&tab=all"
            subtitle = self.process_subs(series, video, url)

            if not subtitle:
                # No Subtitle found. Maybe already archived to season pack
                url = self.server_url + "index.php?search=&soriSorszam=&nyelv=&sorozatnev=&sid=" + \
                      str(series_id) + "&complexsearch=true&knyelv=0&evad=" + str(
                    season) + "&epizod1=&evadpakk=on&cimke=0&minoseg=0&rlsr=0&tab=all"
                subtitle = self.process_subs(series, video, url)

        if isinstance(video, Movie):
            title = series.replace(" ", "+")

            # https://www.feliratok.info/index.php?search=The+Hitman%27s+BodyGuard&soriSorszam=&nyelv=&tab=film
            url = self.server_url + "index.php?search=" + title + "&soriSorszam=&nyelv=&tab=film"
            subtitle = self.process_subs(series, video, url)

        return subtitle

    def process_subs(self, series, video, url):

        subtitles = []

        logger.info('URL for subtitles %s', url)
        r = self.session.get(url, timeout=10).content

        soup = ParserBeautifulSoup(r, ['lxml'])
        tables = soup.find_all("table")
        tables = tables[0].find_all("tr")
        i = 0
        series_imdb_id = None
        for table in tables:
            if "vilagit" in str(table) and i > 1:
                try:
                    sub_hun_name = table.findAll("div", {"class": "magyar"})[0]
                    if isinstance(video, Episode):
                        if "vad)" not in str(sub_hun_name):
                            # <div class="magyar">A pletykaf�szek (3. �vad)</div>
                            sub_hun_name = re.findall(
                                r'(?<=<div class="magyar">).*(?= -)',
                                str(sub_hun_name))[0]
                        else:
                            # <div class="magyar">A holnap legend�i - 3x11</div>
                            sub_hun_name = re.findall(
                                r'(?<=<div class="magyar">).*(?= \()',
                                str(sub_hun_name))[0]
                    if isinstance(video, Movie):
                        sub_hun_name = re.findall(
                            r'(?<=<div class="magyar">).*(?=</div)',
                            str(sub_hun_name))[0]
                except IndexError:
                    sub_hun_name = ""

                asked_for_episode = None
                sub_season = None
                sub_episode = None
                sub_english = table.findAll("div", {"class": "eredeti"})
                if isinstance(video, Episode):
                    asked_for_episode = video.episode
                    if "Season" not in str(sub_english):
                        # [<div class="eredeti">Gossip Girl (Season 3) (DVDRip-REWARD)</div>]
                        sub_english_name = re.findall(
                            r'(?<=<div class="eredeti">).*?(?= -)',
                            str(sub_english))[0]
                        sub_season = int((re.findall(
                            r"(?<=- ).*?(?= - )",
                            str(sub_english))[0].split('x')[0]).strip())
                        sub_episode = int((re.findall(
                            r"(?<=- ).*?(?= - )",
                            str(sub_english))[0].split('x')[1]).strip())

                    else:
                        # [<div class="eredeti">DC's Legends of Tomorrow - 3x11 - Here I Go Again (HDTV-AFG, HDTV-RMX, 720p-SVA, 720p-PSA </div>]
                        sub_english_name = \
                            re.findall(r'(?<=<div class="eredeti">).*?(?=\(Season)', str(sub_english))[0]
                        sub_season = int(
                            re.findall(r"(?<=Season )\d+(?=\))",
                                       str(sub_english))[0])
                        sub_episode = int(video.episode)
                if isinstance(video, Movie):
                    sub_english_name = re.findall(
                        r'(?<=<div class="eredeti">).*?(?=\()',
                        str(sub_english))[0]

                sub_version = (str(sub_english).split('(')[
                    len(str(sub_english).split('(')) - 1]).split(')')[0]
                # <small>Angol</small>
                lang = table.findAll("small")[0]
                sub_language = self.get_language(
                    re.findall(r"(?<=<small>).*(?=</small>)", str(lang))[0])

                # <a href="/index.php?action=letolt&amp;fnev=DCs Legends of Tomorrow - 03x11 - Here I Go Again.SVA.English.C.orig.Addic7ed.com.srt&amp;felirat=1519162191">
                link = str(table.findAll("a")[len(table.findAll("a")) -
                                              1]).replace("amp;", "")
                sub_downloadlink = self.server_url + re.findall(
                    r'(?<=href="/).*(?=">)', link)[0]

                sub_id = re.findall(r"(?<=felirat\=).*(?=\"\>)", link)[0]
                sub_year = video.year
                sub_releases = [s.strip() for s in sub_version.split(',')]

                # For episodes we open the series page so all subtitles imdb_id must be the same. no need to check all
                if isinstance(video, Episode) and series_imdb_id is not None:
                    sub_imdb_id = series_imdb_id
                else:
                    sub_imdb_id = self.find_imdb_id(sub_id)
                    series_imdb_id = sub_imdb_id

                subtitle = SuperSubtitlesSubtitle(
                    sub_language,
                    sub_downloadlink,
                    sub_id,
                    sub_english_name.strip(),
                    sub_season,
                    sub_episode,
                    sub_version,
                    sub_releases,
                    sub_year,
                    sub_imdb_id,
                    asked_for_episode,
                    asked_for_release_group=video.release_group)
                subtitles.append(subtitle)
            i = i + 1
        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Episode):
            titles = [video.series] + video.alternative_series
        elif isinstance(video, Movie):
            titles = [video.title] + video.alternative_titles

        for title in titles:
            subs = self.query(title, video=video)
            if subs:
                return subs

            time.sleep(self.multi_result_throttle)
            return []

    def download_subtitle(self, subtitle):

        # download as a zip
        logger.info('Downloading subtitle %r', subtitle.subtitle_id)
        r = self.session.get(subtitle.page_link, timeout=10)
        r.raise_for_status()

        if ".rar" in subtitle.page_link:
            logger.debug('Archive identified as rar')
            archive_stream = io.BytesIO(r.content)
            archive = RarFile(archive_stream)
            subtitle.content = self.get_subtitle_from_archive(
                subtitle, archive)
        elif ".zip" in subtitle.page_link:
            logger.debug('Archive identified as zip')
            archive_stream = io.BytesIO(r.content)
            archive = ZipFile(archive_stream)
            subtitle.content = self.get_subtitle_from_archive(
                subtitle, archive)
        else:
            subtitle.content = fix_line_ending(r.content)
コード例 #28
0
class StoreAccessTokenProvider(AuthorizationProvider):
    """
    On-premise only.

    StoreAccessTokenProvider is an :py:class:`borneo.AuthorizationProvider` that
    performs the following functions:

        Initial (bootstrap) login to store, using credentials provided.\n
        Storage of bootstrap login token for re-use.\n
        Optionally renews the login token before it expires.\n
        Logs out of the store when closed.

    If accessing an insecure instance of Oracle NoSQL Database the default
    constructor is used, with no arguments.

    If accessing a secure instance of Oracle NoSQL Database a user name and
    password must be provided. That user must already exist in the NoSQL
    Database and have sufficient permission to perform table operations. That
    user's identity is used to authorize all database operations.

    To access to a store without security enabled, no parameter need to be set
    to the constructor.

    To access to a secure store, the constructor requires a valid user name and
    password to access the target store. The user must exist and have sufficient
    permission to perform table operations required by the application. The user
    identity is used to authorize all operations performed by the application.

    :param user_name: the user name to use for the store. This user must exist
        in the NoSQL Database and is the identity that is used for authorizing
        all database operations.
    :type user_name: str
    :param password: the password for the user.
    :type password: str
    :raises IllegalArgumentException: raises the exception if one or more of the
        parameters is malformed or a required parameter is missing.
    """
    # Used when we send user:password pair.
    _BASIC_PREFIX = 'Basic '
    # The general prefix for the login token.
    _BEARER_PREFIX = 'Bearer '
    # Login service end point name.
    _LOGIN_SERVICE = '/login'
    # Login token renew service end point name.
    _RENEW_SERVICE = '/renew'
    # Logout service end point name.
    _LOGOUT_SERVICE = '/logout'
    # Default timeout when sending http request to server
    _HTTP_TIMEOUT_MS = 30000

    def __init__(self, user_name=None, password=None):
        self._endpoint = None
        self._url = None
        self._auth_string = None
        self._auto_renew = True
        self._is_closed = False
        # The base path for security related services.
        self._base_path = HttpConstants.KV_SECURITY_PATH
        # The login token expiration time.
        self._expiration_time = 0
        self._logger = None
        self._logutils = LogUtils(self._logger)
        self._sess = Session()
        self._request_utils = borneo.http.RequestUtils(self._sess,
                                                       self._logutils)
        self._lock = Lock()
        self._timer = None
        self.lock = Lock()

        if user_name is None and password is None:
            # Used to access to a store without security enabled.
            self._is_secure = False
        else:
            if user_name is None or password is None:
                raise IllegalArgumentException('Invalid input arguments.')
            CheckValue.check_str(user_name, 'user_name')
            CheckValue.check_str(password, 'password')
            self._is_secure = True
            self._user_name = user_name
            self._password = password

    @synchronized
    def bootstrap_login(self):
        # Bootstrap login using the provided credentials.
        if not self._is_secure or self._is_closed:
            return
        # Convert the username:password pair in base 64 format.
        pair = self._user_name + ':' + self._password
        try:
            encoded_pair = b64encode(pair)
        except TypeError:
            encoded_pair = b64encode(pair.encode()).decode()
        try:
            # Send request to server for login token.
            response = self._send_request(
                StoreAccessTokenProvider._BASIC_PREFIX + encoded_pair,
                StoreAccessTokenProvider._LOGIN_SERVICE)
            content = response.get_content()
            # Login fail
            if response.get_status_code() != codes.ok:
                raise InvalidAuthorizationException(
                    'Fail to login to service: ' + content)
            if self._is_closed:
                return
            # Generate the authentication string using login token.
            self._auth_string = (StoreAccessTokenProvider._BEARER_PREFIX +
                                 self._parse_json_result(content))
            # Schedule login token renew thread.
            self._schedule_refresh()
        except (ConnectionError, InvalidAuthorizationException) as e:
            self._logutils.log_debug(format_exc())
            raise e
        except Exception as e:
            self._logutils.log_debug(format_exc())
            raise NoSQLException('Bootstrap login fail.', e)

    @synchronized
    def close(self):
        """
        Close the provider, releasing resources such as a stored login token.
        """
        # Don't do anything for non-secure case.
        if not self._is_secure or self._is_closed:
            return
        # Send request for logout.
        try:
            response = self._send_request(
                self._auth_string, StoreAccessTokenProvider._LOGOUT_SERVICE)
            if response.get_status_code() != codes.ok:
                self._logutils.log_info('Failed to logout user ' +
                                        self._user_name + ': ' +
                                        response.get_content())
        except Exception as e:
            self._logutils.log_info('Failed to logout user ' +
                                    self._user_name + ': ' + str(e))

        # Clean up.
        self._is_closed = True
        self._auth_string = None
        self._expiration_time = 0
        self._password = None
        if self._timer is not None:
            self._timer.cancel()
            self._timer = None
        if self._sess is not None:
            self._sess.close()

    def get_authorization_string(self, request=None):
        if (request is not None
                and not isinstance(request, borneo.operations.Request)):
            raise IllegalArgumentException(
                'get_authorization_string requires an instance of Request or '
                + 'None as parameter.')
        if not self._is_secure or self._is_closed:
            return None
        # If there is no cached auth string, re-authentication to retrieve the
        # login token and generate the auth string.
        if self._auth_string is None:
            self.bootstrap_login()
        return self._auth_string

    def is_secure(self):
        """
        Returns whether the provider is accessing a secured store.

        :returns: True if accessing a secure store, otherwise False.
        :rtype: bool
        """
        return self._is_secure

    def set_auto_renew(self, auto_renew):
        """
        Sets the auto-renew state. If True, automatic renewal of the login token
        is enabled.

        :param auto_renew: set to True to enable auto-renew.
        :type auto_renew: bool
        :returns: self.
        :raises IllegalArgumentException: raises the exception if auto_renew is
            not True or False.
        """
        CheckValue.check_boolean(auto_renew, 'auto_renew')
        self._auto_renew = auto_renew
        return self

    def is_auto_renew(self):
        """
        Returns whether the login token is to be automatically renewed.

        :returns: True if auto-renew is set, otherwise False.
        :rtype: bool
        """
        return self._auto_renew

    def set_endpoint(self, endpoint):
        """
        Sets the endpoint of the on-prem proxy.

        :param endpoint: the endpoint.
        :type endpoint: str
        :returns: self.
        :raises IllegalArgumentException: raises the exception if endpoint is
            not a string.
        """
        CheckValue.check_str(endpoint, 'endpoint')
        self._endpoint = endpoint
        self._url = borneo.config.NoSQLHandleConfig.create_url(endpoint, '')
        if self._is_secure and self._url.scheme.lower() != 'https':
            raise IllegalArgumentException(
                'StoreAccessTokenProvider requires use of https.')
        return self

    def get_endpoint(self):
        """
        Returns the endpoint of the on-prem proxy.

        :returns: the endpoint.
        :rtype: str
        """
        return self._endpoint

    def set_logger(self, logger):
        CheckValue.check_logger(logger, 'logger')
        self._logger = logger
        self._logutils = LogUtils(logger)
        return self

    def get_logger(self):
        return self._logger

    def set_ssl_context(self, ssl_ctx):
        # Internal use only
        adapter = SSLAdapter(ssl_ctx)
        self._sess.mount(self._url.scheme + '://', adapter)

    def set_url_for_test(self):
        self._url = urlparse(self._url.geturl().replace('https', 'http'))
        return self

    def validate_auth_string(self, auth_string):
        if self._is_secure and auth_string is None:
            raise IllegalArgumentException(
                'Secured StoreAccessProvider requires a non-none string.')

    def _parse_json_result(self, json_result):
        # Retrieve login token from JSON string.
        result = loads(json_result)
        # Extract expiration time from JSON result.
        self._expiration_time = result['expireAt']
        # Extract login token from JSON result.
        return result['token']

    def _refresh_task(self):
        """
        This task sends a request to the server for login session extension.
        Depending on the server policy, a new login token with new expiration
        time may or may not be granted.
        """
        if not self._is_secure or not self._auto_renew or self._is_closed:
            return
        try:
            old_auth = self._auth_string
            response = self._send_request(
                old_auth, StoreAccessTokenProvider._RENEW_SERVICE)
            token = self._parse_json_result(response.get_content())
            if response.get_status_code() != codes.ok:
                raise InvalidAuthorizationException(token)
            if self._is_closed:
                return
            with self._lock:
                if self._auth_string == old_auth:
                    self._auth_string = (
                        StoreAccessTokenProvider._BEARER_PREFIX + token)
            self._schedule_refresh()
        except Exception as e:
            self._logutils.log_info('Failed to renew login token: ' + str(e))
            if self._timer is not None:
                self._timer.cancel()
                self._timer = None

    def _schedule_refresh(self):
        # Schedule a login token renew when half of the token life time is
        # reached.
        if not self._is_secure or not self._auto_renew:
            return
        # Clean up any existing timer
        if self._timer is not None:
            self._timer.cancel()
            self._timer = None
        acquire_time = int(round(time() * 1000))
        if self._expiration_time <= 0:
            return
        # If it is 10 seconds before expiration, don't do further renew to avoid
        # to many renew request in the last few seconds.
        if self._expiration_time > acquire_time + 10000:
            renew_time = (acquire_time +
                          (self._expiration_time - acquire_time) // 2)
            self._timer = Timer(
                float(renew_time - acquire_time) / 1000, self._refresh_task)
            self._timer.start()

    def _send_request(self, auth_header, service_name):
        # Send HTTPS request to login/renew/logout service location with proper
        # authentication information.
        headers = {'Host': self._url.hostname, 'Authorization': auth_header}
        return self._request_utils.do_get_request(
            self._url.geturl() + self._base_path + service_name, headers,
            StoreAccessTokenProvider._HTTP_TIMEOUT_MS)
コード例 #29
0
class Icinga2Api(object):
    """
    Main Class to implement the Icinga2 API Client
    """
    module = None

    def __init__(self):
        """
          Initialize all needed Variables
        """
        self.icinga_host = module.params.get("host")
        self.icinga_port = module.params.get("port")
        self.icinga_username = module.params.get("username")
        self.icinga_password = module.params.get("password")
        self.state = module.params.get("state")
        self.hostname = module.params.get("hostname")
        self.hostnames = module.params.get("hostnames")
        self.start_time = module.params.get("start_time")
        self.end_time = module.params.get("end_time")
        self.duration = module.params.get("duration")
        self.object_type = module.params.get("object_type")
        self.all_services = module.params.get("all_services")
        self.author = module.params.get("author")
        self.comment = module.params.get("comment")
        self.fixed = module.params.get("fixed")
        self.filter_vars = None
        self.trigger_name = None

        self.icinga_url = "{0}:{1}/v1".format(self.icinga_host,
                                              self.icinga_port)

        self.connection = Session()
        self.connection.headers.update({'Accept': 'application/json'})
        self.connection.auth = (self.icinga_username, self.icinga_password)

        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    def run(self):
        res = dict(changed=False, ansible_module_results="none")

        print("hostname  : {} ({})".format(self.hostname, type(self.hostname)))
        print("hostnames : {} ({})".format(self.hostnames,
                                           type(self.hostnames)))

        if self.hostname and self.hostnames:
            module.fail_json(msg=("Please choose whether to set downtimes for "
                                  "'hostname' or 'hostnames'. "
                                  "Both at the same time is not supported."))

        if len(self.hostnames) != 0:

            res['changed'] = True

            r = dict()

            if iter(self.hostnames):

                for h in self.hostnames:

                    r[h] = dict()

                    if self.__host_exists(h):
                        """

                        """
                        payload = {
                            'type': self.object_type,
                            'filter': "host.name == \"{}\"".format(h),
                            'author': self.author,
                            'comment': self.comment,
                            'start_time': self.start_time,
                            'end_time': self.end_time,
                            'duration': self.duration
                        }
                        if self.fixed:
                            payload.update(fixed=True)
                        else:
                            payload.update(fixed=False)

                        if self.filter_vars:
                            payload.update(filter_vars=self.filter_vars)

                        if self.trigger_name:
                            payload.update(trigger_name=self.trigger_name)

                        if self.object_type == 'Host' and self.all_services is True:
                            payload.update(all_services=True)

                        module.log(msg="downtime for: {}".format(h))
                        module.log(msg="payload: {}".format(payload))

                        code, msg = self.__schedule_downtime(payload)

                        module.log(msg="{}: {}".format(code, msg))

                        r[h] = dict(
                            msg=msg,
                            status_code=code,
                        )

                    else:
                        module.log(msg="404: host {} is not known".format(h))
                        r[h] = dict(
                            msg="host {} is not known".format(h),
                            status_code=404,
                        )

                res['result'] = r

        elif len(self.hostname) != 0:
            pass

        else:
            print("hoo")

#        print(res)
#        result = dict(changed=True,
#                      ansible_module_results="Downtimes removed",
#                      result=dict(req.json(), status_code=req.status_code))

        return res

    def __call_url(self, method='GET', path=None, data=None, headers=None):
        """

        """
        if headers is None:
            headers = {
                'Accept': 'application/json',
                'X-HTTP-Method-Override': method,
            }

        url = "{0}/{1}".format(self.icinga_url, path)
        print(url)
        self.connection.headers.update(headers)

        try:
            if (method == 'GET'):
                ret = self.connection.get(url, verify=False)
                self.connection.close()

            elif (method == 'POST'):
                self.connection.close()
                ret = self.connection.post(url, data=data, verify=False)

            else:
                print("unsupported")

            ret.raise_for_status()

            # print("------------------------------------------------------------------")
            # print(" text    : {}".format(ret.text))
            # print(" headers : {}".format(ret.headers))
            # print(" code    : {}".format(ret.status_code))
            # print("------------------------------------------------------------------")

            return ret.status_code, json.loads(ret.text)

        except Exception as e:
            print(e)
            raise

    def __host_exists(self, hostname):
        """

        """
        code = 0

        data = {
            "type": "Host",
            "attrs": ["name"],
            "filter": "match(\"{0}\", host.name)".format(hostname),
        }

        code, ret = self.__call_url(method='POST',
                                    path="objects/hosts",
                                    data=module.jsonify(data),
                                    headers={
                                        'Accept': 'application/json',
                                        'X-HTTP-Method-Override': 'GET'
                                    })

        results = ret['results']

        if (code == 200 and len(results) != 0):
            # code   = results[0]['code']
            # status = results[0]['status']
            attrs = results[0]['attrs']

            if attrs.get('name') == hostname:
                return True

        return False

    def __schedule_downtime(self, data):
        """

        """
        code = 0
        status = "no status available"

        path = 'actions/schedule-downtime'

        code, ret = self.__call_url(method='POST',
                                    path=path,
                                    data=module.jsonify(data),
                                    headers={
                                        'Accept': 'application/json',
                                        'X-HTTP-Method-Override': 'POST'
                                    })

        results = ret['results']

        if (len(results) != 0):
            # print(json.dumps(results[0]))

            code = int(results[0]['code'])
            status = results[0]['status']

        return code, status
コード例 #30
0
ファイル: titlovi.py プロジェクト: castro732/Sub-Zero.bundle
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
    subtitle_class = TitloviSubtitle
    languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')}
    server_url = 'https://titlovi.com'
    search_url = server_url + '/titlovi/?'
    download_url = server_url + '/download/?type=1&mediaid='

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' \
                                             '(KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
        logger.debug('User-Agent set to %s', self.session.headers['User-Agent'])
        self.session.headers['Referer'] = self.server_url
        logger.debug('Referer set to %s', self.session.headers['Referer'])

    def terminate(self):
        self.session.close()

    def query(self, languages, title, season=None, episode=None, year=None, video=None):
        items_per_page = 10
        current_page = 1

        used_languages = languages
        lang_strings = [str(lang) for lang in used_languages]

        # handle possible duplicate use of Serbian Latin
        if "sr" in lang_strings and "sr-Latn" in lang_strings:
            logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages')
            used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages)
            logger.info('Filtered language list %r', used_languages)

        # convert list of languages into search string
        langs = '|'.join(map(str, [l.titlovi for l in used_languages]))

        # set query params
        params = {'prijevod': title, 'jezik': langs}
        is_episode = False
        if season and episode:
            is_episode = True
            params['s'] = season
            params['e'] = episode
        if year:
            params['g'] = year

        # loop through paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []

        while True:
            # query the server
            try:
                r = self.session.get(self.search_url, params=params, timeout=10)
                r.raise_for_status()

                soup = BeautifulSoup(r.content, 'lxml')

                # number of results
                result_count = int(soup.select_one('.results_count b').string)
            except:
                result_count = None

            # exit if no results
            if not result_count:
                if not subtitles:
                    logger.debug('No subtitles found')
                else:
                    logger.debug("No more subtitles found")
                break

            # number of pages with results
            pages = int(math.ceil(result_count / float(items_per_page)))

            # get current page
            if 'pg' in params:
                current_page = int(params['pg'])

            try:
                sublist = soup.select('section.titlovi > ul.titlovi > li')
                for sub in sublist:
                    # subtitle id
                    sid = sub.find(attrs={'data-id': True}).attrs['data-id']
                    # get download link
                    download_link = self.download_url + sid
                    # title and alternate title
                    match = title_re.search(sub.a.string)
                    if match:
                        _title = match.group('title')
                        alt_title = match.group('altitle')
                    else:
                        continue

                    # page link
                    page_link = self.server_url + sub.a.attrs['href']
                    # subtitle language
                    match = lang_re.search(sub.select_one('.lang').attrs['src'])
                    if match:
                        try:
                            # decode language
                            lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
                        except ValueError:
                            continue

                    # relase year or series start year
                    match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
                    if match:
                        r_year = int(match.group('year'))
                    # fps
                    match = fps_re.search(sub.select_one('.fps').string)
                    if match:
                        fps = match.group('fps')
                    # releases
                    releases = str(sub.select_one('.fps').parent.contents[0].string)

                    # handle movies and series separately
                    if is_episode:
                        # season and episode info
                        sxe = sub.select_one('.s0xe0y').string
                        r_season = None
                        r_episode = None
                        if sxe:
                            match = season_re.search(sxe)
                            if match:
                                r_season = int(match.group('season'))
                            match = episode_re.search(sxe)
                            if match:
                                r_episode = int(match.group('episode'))

                        subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                       alt_title=alt_title, season=r_season, episode=r_episode,
                                                       year=r_year, fps=fps,
                                                       asked_for_release_group=video.release_group,
                                                       asked_for_episode=episode)
                    else:
                        subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
                                                       alt_title=alt_title, year=r_year, fps=fps,
                                                       asked_for_release_group=video.release_group)
                    logger.debug('Found subtitle %r', subtitle)

                    # prime our matches so we can use the values later
                    subtitle.get_matches(video)

                    # add found subtitles
                    subtitles.append(subtitle)

            finally:
                soup.decompose()

            # stop on last page
            if current_page >= pages:
                break

            # increment current page
            params['pg'] = current_page + 1
            logger.debug('Getting page %d', params['pg'])

        return subtitles

    def list_subtitles(self, video, languages):
        season = episode = None
        if isinstance(video, Episode):
            title = video.series
            season = video.season
            episode = video.episode
        else:
            title = video.title

        return [s for s in
                self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year,
                           video=video)]

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            subtitle.content = r.content
            if subtitle.is_valid():
                return
            subtitle.content = None

            raise ProviderError('Unidentified archive type')

        subs_in_archive = archive.namelist()

        # if Serbian lat and cyr versions are packed together, try to find right version
        if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'):
            self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive)
        else:
            # use default method for everything else
            subtitle.content = self.get_subtitle_from_archive(subtitle, archive)

    def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive):
        sr_lat_subs = []
        sr_cyr_subs = []
        sub_to_extract = None

        for sub_name in subs_in_archive:
            if not ('.cyr' in sub_name or '.cir' in sub_name):
                sr_lat_subs.append(sub_name)

            if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name:
                sr_cyr_subs.append(sub_name)

        if subtitle.language == 'sr':
            if len(sr_lat_subs) > 0:
                sub_to_extract = sr_lat_subs[0]

        if subtitle.language == 'sr-Cyrl':
            if len(sr_cyr_subs) > 0:
                sub_to_extract = sr_cyr_subs[0]

        logger.info(u'Using %s from the archive', sub_to_extract)
        subtitle.content = fix_line_ending(archive.read(sub_to_extract))
コード例 #31
0
class BSPlayerProvider(Provider):
    """BSPlayer Provider."""
    languages = {Language('por', 'BR')} | {
        Language(l)
        for l in [
            'ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra',
            'hun', 'ita', 'jpn', 'kor', 'nld', 'pol', 'por', 'ron', 'rus',
            'spa', 'swe', 'tur', 'ukr', 'zho'
        ]
    }
    SEARCH_THROTTLE = 8
    hash_verifiable = True

    # batantly based on kodi's bsplayer plugin
    # also took from BSPlayer-Subtitles-Downloader
    def __init__(self):
        self.initialize()

    def initialize(self):
        self.session = Session()
        self.search_url = self.get_sub_domain()
        self.token = None
        self.login()

    def terminate(self):
        self.session.close()
        self.logout()

    def api_request(self, func_name='logIn', params='', tries=5):
        headers = {
            'User-Agent':
            'BSPlayer/2.x (1022.12360)',
            'Content-Type':
            'text/xml; charset=utf-8',
            'Connection':
            'close',
            'SOAPAction':
            '"http://api.bsplayer-subtitles.com/v1.php#{func_name}"'.format(
                func_name=func_name)
        }
        data = (
            '<?xml version="1.0" encoding="UTF-8"?>\n'
            '<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" '
            'xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" '
            'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
            'xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ns1="{search_url}">'
            '<SOAP-ENV:Body SOAP-ENV:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">'
            '<ns1:{func_name}>{params}</ns1:{func_name}></SOAP-ENV:Body></SOAP-ENV:Envelope>'
        ).format(search_url=self.search_url,
                 func_name=func_name,
                 params=params)
        logger.info('Sending request: %s.' % func_name)
        for i in iter(range(tries)):
            try:
                self.session.headers.update(headers.items())
                res = self.session.post(self.search_url, data)
                return ElementTree.fromstring(res.text)

            except Exception as ex:
                logger.info("ERROR: %s." % ex)
                if func_name == 'logIn':
                    self.search_url = self.get_sub_domain()

                sleep(1)
        logger.info('ERROR: Too many tries (%d)...' % tries)
        raise Exception('Too many tries...')

    def login(self):
        # If already logged in
        if self.token:
            return True

        root = self.api_request(func_name='logIn',
                                params=('<username></username>'
                                        '<password></password>'
                                        '<AppID>BSPlayer v2.67</AppID>'))
        res = root.find('.//return')
        if res.find('status').text == 'OK':
            self.token = res.find('data').text
            logger.info("Logged In Successfully.")
            return True
        return False

    def logout(self):
        # If already logged out / not logged in
        if not self.token:
            return True

        root = self.api_request(
            func_name='logOut',
            params='<handle>{token}</handle>'.format(token=self.token))
        res = root.find('.//return')
        self.token = None
        if res.find('status').text == 'OK':
            logger.info("Logged Out Successfully.")
            return True
        return False

    def query(self, video, video_hash, language):
        if not self.login():
            return []

        if isinstance(language, (tuple, list, set)):
            # language_ids = ",".join(language)
            # language_ids = 'spa'
            language_ids = ','.join(sorted(l.opensubtitles for l in language))

        if video.imdb_id is None:
            imdbId = '*'
        else:
            imdbId = video.imdb_id
        sleep(self.SEARCH_THROTTLE)
        root = self.api_request(
            func_name='searchSubtitles',
            params=('<handle>{token}</handle>'
                    '<movieHash>{movie_hash}</movieHash>'
                    '<movieSize>{movie_size}</movieSize>'
                    '<languageId>{language_ids}</languageId>'
                    '<imdbId>{imdbId}</imdbId>').format(
                        token=self.token,
                        movie_hash=video_hash,
                        movie_size=video.size,
                        language_ids=language_ids,
                        imdbId=imdbId))
        res = root.find('.//return/result')
        if res.find('status').text != 'OK':
            return []

        items = root.findall('.//return/data/item')
        subtitles = []
        if items:
            logger.info("Subtitles Found.")
            for item in items:
                subID = item.find('subID').text
                subDownloadLink = item.find('subDownloadLink').text
                subLang = Language.fromopensubtitles(item.find('subLang').text)
                subName = item.find('subName').text
                subFormat = item.find('subFormat').text
                subtitles.append(
                    BSPlayerSubtitle(subLang, subName, subFormat, video,
                                     subDownloadLink))
        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(video, video.hashes['bsplayer'], languages)

    def get_sub_domain(self):
        # s1-9, s101-109
        SUB_DOMAINS = [
            's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's101',
            's102', 's103', 's104', 's105', 's106', 's107', 's108', 's109'
        ]
        API_URL_TEMPLATE = "http://{sub_domain}.api.bsplayer-subtitles.com/v1.php"
        sub_domains_end = len(SUB_DOMAINS) - 1
        return API_URL_TEMPLATE.format(
            sub_domain=SUB_DOMAINS[random.randint(0, sub_domains_end)])

    def download_subtitle(self, subtitle):
        session = Session()
        _addheaders = {'User-Agent': 'Mozilla/4.0 (compatible; Synapse)'}
        session.headers.update(_addheaders)
        res = session.get(subtitle.page_link)
        if res:
            if res.text == '500':
                raise ValueError('Error 500 on server')

            with gzip.GzipFile(fileobj=io.BytesIO(res.content)) as gf:
                subtitle.content = gf.read()
                subtitle.normalize()

            return subtitle
        raise ValueError('Problems conecting to the server')
コード例 #32
0
ファイル: sessions.py プロジェクト: jdemaeyer/txrequests
 def close(self):
     requestsSession.close(self)
     if self.ownPool:
         self.pool.stop()
コード例 #33
0
class BaseClient(object):
    def __init__(self, auth_id=None, auth_token=None, proxies=None, timeout=5):
        """
        The Plivo API client.

        Deals with all the API requests to be made.
        """

        self.base_uri = PLIVO_API_BASE_URI
        self.session = Session()
        self.session.headers.update({
            'User-Agent': get_user_agent(),
            'Content-Type': 'application/json',
            'Accept': 'application/json',
        })
        self.session.auth = fetch_credentials(auth_id, auth_token)
        self.multipart_session = Session()
        self.multipart_session.headers.update({
            'User-Agent': get_user_agent(),
            'Cache-Control': 'no-cache',
        })
        self.multipart_session.auth = fetch_credentials(auth_id, auth_token)
        self.proxies = proxies
        self.timeout = timeout

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.session.close()
        self.multipart_session.close()

    def process_response(self,
                         method,
                         response,
                         response_type=None,
                         objects_type=None):
        """Processes the API response based on the status codes and method used
        to access the API
        """

        try:
            response_json = response.json(object_hook=lambda x: ResponseObject(
                x) if isinstance(x, dict) else x)
            if response_type:
                r = response_type(self, response_json.__dict__)
                response_json = r

            if 'objects' in response_json and objects_type:
                response_json.objects = [
                    objects_type(self, obj.__dict__)
                    for obj in response_json.objects
                ]
        except ValueError:
            response_json = None

        if response.status_code == 400:
            if response_json and 'error' in response_json:
                raise ValidationError(response_json.error)
            raise ValidationError(
                'A parameter is missing or is invalid while accessing resource'
                'at: {url}'.format(url=response.url))

        if response.status_code == 401:
            if response_json and 'error' in response_json:
                raise AuthenticationError(response_json.error)
            raise AuthenticationError(
                'Failed to authenticate while accessing resource at: '
                '{url}'.format(url=response.url))

        if response.status_code == 404:
            if response_json and 'error' in response_json:
                raise ResourceNotFoundError(response_json.error)
            raise ResourceNotFoundError(
                'Resource not found at: {url}'.format(url=response.url))

        if response.status_code == 405:
            if response_json and 'error' in response_json:
                raise InvalidRequestError(response_json.error)
            raise InvalidRequestError(
                'HTTP method "{method}" not allowed to access resource at: '
                '{url}'.format(method=method, url=response.url))

        if response.status_code == 500:
            if response_json and 'error' in response_json:
                raise PlivoServerError(response_json.error)
            raise PlivoServerError(
                'A server error occurred while accessing resource at: '
                '{url}'.format(url=response.url))

        if method == 'DELETE':
            if response.status_code != 204:
                raise PlivoRestError('Resource at {url} could not be '
                                     'deleted'.format(url=response.url))

        elif response.status_code not in [200, 201, 202]:
            raise PlivoRestError(
                'Received status code {status_code} for the HTTP method '
                '"{method}"'.format(status_code=response.status_code,
                                    method=method))

        return response_json

    def create_request(self, method, path=None, data=None):
        path = path or []
        req = Request(
            method, '/'.join([self.base_uri, self.session.auth[0]] +
                             list([str(p) for p in path])) + '/',
            **({
                'params': data
            } if method == 'GET' else {
                'json': data
            }))
        return self.session.prepare_request(req)

    def create_multipart_request(self,
                                 method,
                                 path=None,
                                 data=None,
                                 files=None):
        path = path or []

        data_args = {}
        if method == 'GET':
            data_args['params'] = data
        else:
            data_args['data'] = data
            if files and 'file' in files and files['file'] != '':
                data_args['files'] = files

        req = Request(
            method, '/'.join([self.base_uri, self.multipart_session.auth[0]] +
                             list([str(p) for p in path])) + '/',
            **(data_args))
        return self.multipart_session.prepare_request(req)

    def send_request(self, request, **kwargs):
        if 'session' in kwargs:
            session = kwargs['session']
            del kwargs['session']
        else:
            session = self.session

        return session.send(request,
                            proxies=self.proxies,
                            timeout=self.timeout,
                            **kwargs)

    def request(self,
                method,
                path=None,
                data=None,
                response_type=None,
                objects_type=None,
                files=None,
                **kwargs):
        if files is not None:
            req = self.create_multipart_request(method, path, data, files)
            session = self.multipart_session
        else:
            req = self.create_request(method, path, data)
            session = self.session
        kwargs['session'] = session
        res = self.send_request(req, **kwargs)
        return self.process_response(method, res, response_type, objects_type)
コード例 #34
0
ファイル: addic7ed.py プロジェクト: Hydrog3n/SickRage
class Addic7edProvider(Provider):
    languages = {Language('por', 'BR')} | {Language(l) for l in [
        'ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg',
        'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus',
        'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho'
    ]}
    video_types = (Episode,)
    server_url = 'http://www.addic7ed.com/'

    def __init__(self, username=None, password=None):
        if username is not None and password is None or username is None and password is not None:
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False

    def initialize(self):
        self.session = Session()
        self.session.headers = {'User-Agent': 'Subliminal/%s' % get_version(__version__)}

        # login
        if self.username is not None and self.password is not None:
            logger.info('Logging in')
            data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
            r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)

            if r.status_code != 302:
                raise AuthenticationError(self.username)

            logger.debug('Logged in')
            self.logged_in = True

    def terminate(self):
        # logout
        if self.logged_in:
            logger.info('Logging out')
            r = self.session.get(self.server_url + 'logout.php', timeout=10)
            r.raise_for_status()
            logger.debug('Logged out')
            self.logged_in = False

        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td.version > h3 > a[href^="/show/"]'):
            show_ids[sanitize_string(show.text).lower()] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int or None
        :return: the show id, if found.
        :rtype: int or None

        """
        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': sanitize_string(series_year, replacement=' '), 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'search.php', params=params, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get the suggestion
        suggestion = soup.select('span.titulo > a[href^="/show/"]')
        if not suggestion:
            logger.warning('Show id not found: no suggestion')
            return None
        if not sanitized_string_equal(suggestion[0].i.text, series_year):
            logger.warning('Show id not found: suggestion does not match')
            return None
        show_id = int(suggestion[0]['href'][6:])
        logger.debug('Found show id %d', show_id)

        return show_id

    def get_show_id(self, series, year=None, country_code=None):
        """Get the best matching show id for `series`, `year` and `country_code`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int or None
        :param country_code: country code of the series, if any.
        :type country_code: str or None
        :return: the show id, if found.
        :rtype: int or None

        """
        series_sanitized = sanitize_string(series).lower()
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with country
        if not show_id and country_code:
            logger.debug('Getting show id with country')
            show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))

        # attempt with year
        if not show_id and year:
            logger.debug('Getting show id with year')
            show_id = show_ids.get('%s %d' % (series_sanitized, year))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        # search as last resort
        if not show_id:
            logger.warning('Series not found in show ids')
            show_id = self._search_show_id(series)

        return show_id

    def query(self, series, season, year=None, country=None):
        # get the show id
        show_id = self.get_show_id(series, year, country)
        if show_id is None:
            logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country})
            return []

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id, season)
        r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
        r.raise_for_status()
        if r.status_code == 304:
            raise TooManyRequests()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        match = series_year_re.match(soup.select('#header font')[0].text.strip()[:-10])
        series = match.group('series')
        year = int(match.group('year')) if match.group('year') else None
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if status != 'Completed':
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = Addic7edSubtitle(language, hearing_impaired, page_link, series, season, episode, title, year,
                                        version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        return [s for s in self.query(video.series, video.season, video.year)
                if s.language in languages and s.episode == video.episode]

    def download_subtitle(self, subtitle):
        # download the subtitle
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        # detect download limit exceeded
        if r.headers['Content-Type'] == 'text/html':
            raise DownloadLimitExceeded

        subtitle.content = fix_line_ending(r.content)
コード例 #35
0
class HosszupuskaProvider(Provider, ProviderSubtitleArchiveMixin):
    """Hosszupuska Provider."""
    languages = {Language('hun', 'HU')} | {Language(l) for l in [
        'hun', 'eng'
    ]}
    video_types = (Episode,)
    server_url = 'http://hosszupuskasub.com/'
    subtitle_class = HosszupuskaSubtitle
    hearing_impaired_verifiable = False
    multi_result_throttle = 2  # seconds

    def initialize(self):
        self.session = Session()
        self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")}

    def terminate(self):
        self.session.close()

    def get_language(self, text):
        if text == '1.gif':
            return Language.fromhosszupuska('hu')
        if text == '2.gif':
            return Language.fromhosszupuska('en')
        return None

    def query(self, series, season, episode, year=None, video=None):

        # Search for s01e03 instead of s1e3
        seasona = "%02d" % season
        episodea = "%02d" % episode
        series = fix_inconsistent_naming(series)
        seriesa = series.replace(' ', '+').replace('\'', '')

        # get the episode page
        logger.info('Getting the page for episode %s', episode)
        url = self.server_url + "sorozatok.php?cim=" + seriesa + "&evad="+str(seasona) + \
            "&resz="+str(episodea)+"&nyelvtipus=%25&x=24&y=8"
        logger.info('Url %s', url)

        r = self.session.get(url, timeout=10).content

        i = 0
        soup = ParserBeautifulSoup(r, ['lxml'])

        table = soup.find_all("table")[9]

        subtitles = []
        # loop over subtitles rows
        for row in table.find_all("tr"):
            i = i + 1
            if "this.style.backgroundImage='url(css/over2.jpg)" in str(row) and i > 5:
                datas = row.find_all("td")

                # Currently subliminal not use these params, but maybe later will come in handy
                # hunagrian_name = re.split('s(\d{1,2})', datas[1].find_all('b')[0].getText())[0]
                # Translator of subtitle
                # sub_translator = datas[3].getText()
                # Posting date of subtitle
                # sub_date = datas[4].getText()

                sub_year = sub_english_name = sub_version = None
                # Handle the case when '(' in subtitle
                if datas[1].getText().count('(') == 2:
                    sub_english_name = re.split('s(\d{1,2})e(\d{1,2})', datas[1].getText())[3]
                if datas[1].getText().count('(') == 3:
                    sub_year = re.findall(r"(?<=\()(\d{4})(?=\))", datas[1].getText().strip())[0]
                    sub_english_name = re.split('s(\d{1,2})e(\d{1,2})', datas[1].getText().split('(')[0])[0]

                if not sub_english_name:
                    continue

                sub_season = int((re.findall('s(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0])
                                 .lstrip('0'))
                sub_episode = int((re.findall('e(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0])
                                  .lstrip('0'))

                if sub_season == season and sub_episode == episode:
                    sub_language = self.get_language(datas[2].find_all('img')[0]['src'].split('/')[1])
                    sub_downloadlink = datas[6].find_all('a')[1]['href']
                    sub_id = sub_downloadlink.split('=')[1].split('.')[0]

                    if datas[1].getText().count('(') == 2:
                        sub_version = datas[1].getText().split('(')[1].split(')')[0]
                    if datas[1].getText().count('(') == 3:
                        sub_version = datas[1].getText().split('(')[2].split(')')[0]

                    # One subtitle can be used for several releases
                    sub_releases = [s.strip() for s in sub_version.split(',')]
                    subtitle = self.subtitle_class(sub_language, sub_downloadlink, sub_id, sub_english_name.strip(),
                                                   sub_season, sub_episode, sub_version, sub_releases, sub_year,
                                                   asked_for_release_group=video.release_group,
                                                   asked_for_episode=episode)

                    logger.debug('Found subtitle: %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        titles = [video.series] + video.alternative_series

        for title in titles:
            subs = self.query(title, video.season, video.episode, video.year, video=video)
            if subs:
                return subs

            time.sleep(self.multi_result_throttle)

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.page_link, timeout=10)
        r.raise_for_status()

        # open the archive
        archive_stream = io.BytesIO(r.content)
        if is_rarfile(archive_stream):
            logger.debug('Archive identified as rar')
            archive = RarFile(archive_stream)
        elif is_zipfile(archive_stream):
            logger.debug('Archive identified as zip')
            archive = ZipFile(archive_stream)
        else:
            raise ProviderError('Unidentified archive type')

        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
コード例 #36
0
class TestDownloader(TestCase):

    def setUp(self):
        """  Log into the remote server. """

        self.url = 'http://bamboo-mec.de/ll.php5'

        test_dir = dirname(__file__)
        self.directory = join(test_dir, 'temp')

        credentials = {'username': '******',
                       'password': '******'}

        self.session = Session()
        headers = {'user-agent': 'Mozilla/5.0'}
        self.session.headers.update(headers)

        response = self.session.post(self.url, credentials)
        if response.ok:
            print('Now logged into remote server.')
        else:
            print('Failed to log in')
            exit(1)

    def tearDown(self):
        """  Logout. """

        # Say goodbye to the server
        url = 'http://bamboo-mec.de/index.php5'
        payload = {'logout': '1'}

        response = self.session.get(url, params=payload)

        if response.history[0].status_code == 302:
            # We have been redirected to the home page
            print('Logged out from remote server. Goodbye!')

        self.session.close()

        # Clean up the temp directory
        for file in listdir(self.directory):
            if search(self.day.strftime('%Y-%m-%d'), file):
                remove(join(self.directory, file))

    def testMiner(self):
        """ Check if the Miner class can download files correctly from the company server. """

        m = Miner(self.session,
                  self.directory,
                  overwrite=True)

        random_day = randint(1, 28)
        random_month = randint(1, 12)
        self.day = date(2014, random_month, random_day)

        print('Testing file download for %s.' % str(self.day))
        soups = m.mine(self.day)

        if not soups:
            # No jobs on that day... try again
            self.testMiner()
        else:
            for soup in soups:
                self.assertIsInstance(soup.data, BeautifulSoup)
                self.assertIsInstance(soup.stamp.date, date)
                self.assertIsInstance(soup.stamp.uuid, str)

                order_detail = soup.data.find(id='order_detail')
                self.assertIsNotNone(order_detail)
コード例 #37
0
ファイル: subtitulamos.py プロジェクト: youdroid/SickChill
class SubtitulamosProvider(Provider):
    """Subtitulamos Provider."""
    languages = {Language('por', 'BR')} | {
        Language(l)
        for l in ['cat', 'eng', 'glg', 'por', 'spa']
    }
    video_types = (Episode, )
    server_url = 'https://www.subtitulamos.tv/'
    search_url = server_url + 'search/query'

    def __init__(self):
        self.session = None

    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % __short_version__
        # self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 ' \
        #                                      'Firefox/56.0 '

    def terminate(self):
        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_url_titles(self, series, season, episode, year=None):
        """Search the URL titles by kind for the given `title`, `season` and `episode`.

        :param str series: series to search for.
        :param int season: season to search for.
        :param int episode: episode to search for.
        :param int year: year to search for.
        :return: the episode URL.
        :rtype: str

        """
        # make the search
        logger.info('Searching episode url for %s, season %d, episode %d',
                    series, season, episode)
        episode_url = None

        search = '{} {}x{}'.format(series, season, episode)
        r = self.session.get(self.search_url,
                             headers={'Referer': self.server_url},
                             params={'q': search},
                             timeout=10)
        r.raise_for_status()

        if r.status_code != 200:
            logger.error('Error getting episode url')
            raise ProviderError('Error getting episode url')

        results = json.loads(r.text)

        for result in results:
            title = sanitize(result['name'])

            # attempt series with year
            if sanitize('{} ({})'.format(series, year)) in title:
                for episode_data in result['episodes']:
                    if season == episode_data[
                            'season'] and episode == episode_data['number']:
                        episode_url = self.server_url + 'episodes/{}'.format(
                            episode_data['id'])
                        return episode_url
            # attempt series without year
            elif sanitize(series) in title:
                for episode_data in result['episodes']:
                    if season == episode_data[
                            'season'] and episode == episode_data['number']:
                        episode_url = self.server_url + 'episodes/{}'.format(
                            episode_data['id'])
                        return episode_url

        return episode_url

    def query(self, series, season, episode, year=None):
        # get the episode url
        episode_url = self._search_url_titles(series, season, episode, year)
        if episode_url is None:
            logger.error('No episode url found for %s, season %d, episode %d',
                         series, season, episode)
            return []

        r = self.session.get(episode_url,
                             headers={'Referer': self.server_url},
                             timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get episode title
        title_pattern = re.compile('{}(.+){}x{:02d}- (.+)'.format(
            series, season, episode).lower())
        title = title_pattern.search(
            soup.select('#episode_title')[0].get_text().strip().lower()).group(
                2)

        subtitles = []
        for sub in soup.find_all('div', attrs={'id': 'progress_buttons_row'}):
            # read the language
            language = Language.fromsubtitulamos(
                sub.find_previous(
                    'div', class_='subtitle_language').get_text().strip())
            hearing_impaired = False

            # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
            # because if exists spanish and spanish latino subtitle for the same episode, the score will be
            # higher with spanish subtitle. Spanish subtitle takes priority.
            if language == Language('spa', 'MX'):
                language = Language('spa')
                hearing_impaired = True

            # read the release subtitle
            release = sub.find_next('div',
                                    class_='version_name').get_text().strip()

            # ignore incomplete subtitles
            status = sub.find_next('div',
                                   class_='subtitle_buttons').contents[1]
            if status.name != 'a':
                logger.debug('Ignoring subtitle in [%s] not finished',
                             language)
                continue

            # read the subtitle url
            subtitle_url = self.server_url + status['href'][1:]
            subtitle = SubtitulamosSubtitle(language, hearing_impaired,
                                            episode_url, series, season,
                                            episode, title, year, release,
                                            subtitle_url)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        return [
            s for s in self.query(video.series, video.season, video.episode,
                                  video.year) if s.language in languages
        ]

    def download_subtitle(self, subtitle):
        # download the subtitle
        logger.info('Downloading subtitle %s', subtitle.download_link)
        r = self.session.get(subtitle.download_link,
                             headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        subtitle.content = fix_line_ending(r.content)
コード例 #38
0
ファイル: podnapisi.py プロジェクト: daanvb/Sub-Zero.bundle
class PodnapisiProvider(Provider):
    languages = ({Language('por', 'BR'), Language('srp', script='Latn')} |
                 {Language.fromalpha2(l) for l in language_converters['alpha2'].codes})
    video_types = (Episode, Movie)
    server_url = 'http://podnapisi.net/subtitles/'

    def initialize(self):
        self.session = Session()
        self.session.headers = {'User-Agent': 'Subliminal/%s' % get_version(__version__)}

    def terminate(self):
        self.session.close()

    def query(self, language, keyword, season=None, episode=None, year=None):
        # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
        params = {'sXML': 1, 'sL': str(language), 'sK': keyword}
        is_episode = False
        if season and episode:
            is_episode = True
            params['sTS'] = season
            params['sTE'] = episode
        if year:
            params['sY'] = year

        # loop over paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []
        pids = set()
        while True:
            # query the server
            xml = etree.fromstring(self.session.get(self.server_url + 'search/old', params=params, timeout=10).content)

            # exit if no results
            if not int(xml.find('pagination/results').text):
                logger.debug('No subtitles found')
                break

            # loop over subtitles
            for subtitle_xml in xml.findall('subtitle'):
                # read xml elements
                language = Language.fromietf(subtitle_xml.find('language').text)
                hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
                page_link = subtitle_xml.find('url').text
                pid = subtitle_xml.find('pid').text
                releases = []
                if subtitle_xml.find('release').text:
                    for release in subtitle_xml.find('release').text.split():
                        releases.append(re.sub(r'\.+$', '', release))  # remove trailing dots
                title = subtitle_xml.find('title').text
                season = int(subtitle_xml.find('tvSeason').text)
                episode = int(subtitle_xml.find('tvEpisode').text)
                year = int(subtitle_xml.find('year').text)

                if is_episode:
                    subtitle = PodnapisiSubtitle(language, hearing_impaired, page_link, pid, releases, title,
                                                 season=season, episode=episode, year=year)
                else:
                    subtitle = PodnapisiSubtitle(language, hearing_impaired, page_link, pid, releases, title,
                                                 year=year)

                # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
                if pid in pids:
                    continue

                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)
                pids.add(pid)

            # stop on last page
            if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text):
                break

            # increment current page
            params['page'] = int(xml.find('pagination/current').text) + 1
            logger.debug('Getting page %d', params['page'])

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Episode):
            return [s for l in languages for s in self.query(l, video.series, season=video.season,
                                                             episode=video.episode, year=video.year)]
        elif isinstance(video, Movie):
            return [s for l in languages for s in self.query(l, video.title, year=video.year)]

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info('Downloading subtitle %r')
        r = self.session.get(self.server_url + subtitle.pid + '/download', params={'container': 'zip'}, timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            if len(zf.namelist()) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
コード例 #39
0
ファイル: betaseries.py プロジェクト: Mexx62/bazarr
class BetaSeriesProvider(Provider):
    """BetaSeries Provider"""
    languages = {Language(l) for l in ['fra', 'eng']}

    def __init__(self, token=None):
        if not token:
            raise ConfigurationError('Token must be specified')
        self.token = token

    def initialize(self):
        self.session = Session()
        self.session.headers = {
            'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
        }

    def terminate(self):
        self.session.close()

    def query(self, languages, video):
        # query the server
        if isinstance(video, Movie):
            logger.error(
                'It\'s not possible to search for a movie subtitle on BetaSeries'
            )
            return []
        elif isinstance(video, Episode):
            result = None
            if video.tvdb_id:
                params = {
                    'key': self.token,
                    'thetvdb_id': video.tvdb_id,
                    'v': 3.0,
                    'subtitles': 1
                }
                logger.debug('Searching subtitles %r', params)
                res = self.session.get(server_url + 'episodes/display',
                                       params=params,
                                       timeout=10)
                res.raise_for_status()
                result = res.json()
            elif video.series_tvdb_id:
                params = {
                    'key': self.token,
                    'thetvdb_id': video.series_tvdb_id,
                    'season': video.season,
                    'episode': video.episode,
                    'subtitles': 1,
                    'v': 3.0
                }
                logger.debug('Searching subtitles %r', params)
                res = self.session.get(server_url + 'shows/episodes',
                                       params=params,
                                       timeout=10)
                res.raise_for_status()
                result = res.json()

        if result['errors'] != []:
            logger.debug('Status error: %r', result['errors'])
            return []

        # parse the subtitles
        subtitles = []
        if 'episode' in result:
            subs = result['episode']['subtitles']
        elif 'episodes' in result:
            subs = result['episodes'][0]['subtitles']
        else:
            return []

        for sub in subs:
            language = _translateLanguageCodeToLanguage(sub['language'])
            if language in languages:
                # Filter seriessub source because it shut down so the links are all 404
                if sub['source'] != 'seriessub':
                    subtitles.append(
                        BetaSeriesSubtitle(sub['id'], language, sub['file'],
                                           sub['url']))

        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(languages, video)

    def download_subtitle(self, subtitle):
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        archive = _get_archive(r.content)
        subtitle_content = _get_subtitle_from_archive(
            archive) if archive else r.content

        if subtitle_content:
            subtitle.content = fix_line_ending(subtitle_content)
        else:
            logger.debug('Could not extract subtitle from %r', archive)
コード例 #40
0
ファイル: legendastv.py プロジェクト: Comptezero/SickRage
class LegendasTvProvider(Provider):
    languages = {Language.fromlegendastv(l) for l in language_converters['legendastv'].codes}
    video_types = (Episode, Movie)
    server_url = 'http://legendas.tv'

    def __init__(self, username=None, password=None):
        if username is not None and password is None or username is None and password is not None:
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False

    def initialize(self):
        self.session = Session()

        # login
        if self.username is not None and self.password is not None:
            logger.info('Logging in')
            data = {'_method': 'POST', 'data[User][username]': self.username, 'data[User][password]': self.password}
            r = self.session.post('%s/login' % self.server_url, data, allow_redirects=False, timeout=TIMEOUT)
            r.raise_for_status()

            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            auth_error = soup.find('div', {'class': 'alert-error'}, text=re.compile(u'.*Usuário ou senha inválidos.*'))

            if auth_error:
                raise AuthenticationError(self.username)

            logger.debug('Logged in')
            self.logged_in = True

    def terminate(self):
        # logout
        if self.logged_in:
            logger.info('Logging out')
            r = self.session.get('%s/users/logout' % self.server_url, timeout=TIMEOUT)
            r.raise_for_status()
            logger.debug('Logged out')
            self.logged_in = False

        self.session.close()

    def matches(self, actual_properties, expected_title, expected_season=None, expected_episode=None,
                expected_year=None, ignore_episode=False):
        """
        Matches the `actual_properties` against the expected parameters. The `actual_properties` keys follow the
        guessit properties names.
        For movies:
          - `type` should match
          - `title` should match
          - `year` should match, unless they're not defined and expected and actual `title`s are equal
        For episodes:
          - `type` should match
          - `series` should match
          - `season` should match
          - `episode` should match, unless `ignore_episode` is True

        :param dict actual_properties: dictionary that contains the actual values following guessit property names.
        :param str expected_title: the expected movie/series title.
        :param int expected_season: the expected series season number.
        :param int expected_episode: the expected series episode number.
        :param int expected_year: the expected movie/series year.
        :param bool ignore_episode: `True` if episode matching should be ignored. Default: `False`.
        :return: Whether actual matches expected.
        :rtype: bool

        """
        expected_type = 'episode' if expected_season else 'movie'
        if expected_type != actual_properties.get('type'):
            return False

        s_actual_title = sanitize(actual_properties.get('title'))
        s_expected_title = sanitize(expected_title)

        if not s_actual_title or not s_expected_title or s_expected_title not in s_actual_title:
            return False

        if expected_type == 'movie':
            if expected_year != actual_properties.get('year'):
                if expected_year and actual_properties.get('year'):
                    return False
                if s_expected_title != s_actual_title:
                    return False

        elif expected_type == 'episode':
            if expected_season != actual_properties.get('season'):
                return False
            if not ignore_episode and expected_episode != actual_properties.get('episode'):
                return False

        return True

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def search_candidates(self, title, season, episode, year):
        """
        Returns candidates for shows or movies by querying `/legenda/sugestao` page.
        Since the result is a list of candidates (movies, series, etc) an additional filtering is required.
        The properties type, name, year and season are used to filter out bad suggestions.

        :param str title: the movie/series title.
        :param int season: the series season number.
        :param int episode: the series episode number.
        :param int year: the movie/series year.
        :return: the candidates for shows or movies.
        :rtype: list of dict

        """
        results = dict()
        for keyword in {sanitize(title), title.lower().replace(':', '')}:
            logger.info('Searching candidates using the keyword %s', keyword)
            r = self.session.get('%s/legenda/sugestao/%s' % (self.server_url, keyword), timeout=TIMEOUT)
            r.raise_for_status()
            results.update({item['_id']: item for item in json.loads(r.text)})

        # get the shows/movies out of the suggestions.
        # json sample:
        # [
        #    {
        #        "_index": "filmes",
        #        "_type": "filme",
        #        "_id": "24551",
        #        "_score": null,
        #        "_source": {
        #            "id_filme": "24551",
        #            "id_imdb": "903747",
        #            "tipo": "S",
        #            "int_genero": "1036",
        #            "dsc_imagen": "tt903747.jpg",
        #            "dsc_nome": "Breaking Bad",
        #            "dsc_sinopse": "Dos mesmos criadores de Arquivo X, mas n\u00e3o tem nada de sobrenatural nesta
        #                            s\u00e9rie. A express\u00e3o \"breaking bad\" \u00e9 usada quando uma coisa que
        #                            j\u00e1 estava ruim, fica ainda pior. E \u00e9 exatamente isso que acontece com
        #                            Walter White, um professor de qu\u00edmica, que vivia sua vida \"tranquilamente\"
        #                            quando, boom, um diagn\u00f3stico terminal muda tudo. O liberta. Ele come\u00e7a a
        #                            usar suas habilidades em qu\u00edmica de outra forma: montando um laborat\u00f3rio
        #                            de drogas para financiar o futuro de sua fam\u00edlia.",
        #            "dsc_data_lancamento": "2011",
        #            "dsc_url_imdb": "http:\/\/www.imdb.com\/title\/tt0903747\/",
        #            "dsc_nome_br": "Breaking Bad - 4\u00aa Temporada",
        #            "soundex": null,
        #            "temporada": "4",
        #            "id_usuario": "241436",
        #            "flg_liberado": "0",
        #            "dsc_data_liberacao": null,
        #            "dsc_data": "2011-06-12T21:06:42",
        #            "dsc_metaphone_us": "BRKNKBT0SSN",
        #            "dsc_metaphone_br": "BRKNKBTTMPRT",
        #            "episodios": null,
        #            "flg_seriado": null,
        #            "last_used": "1372569074",
        #            "deleted": false
        #        },
        #        "sort": [
        #            "4"
        #        ]
        #    }
        # ]
        #
        # Notes:
        #  tipo: Defines if the entry is a movie or a tv show (or a collection??)
        #  imdb_id: Sometimes it appears as a number and sometimes as a string prefixed with tt
        #  temporada: Sometimes is ``null`` and season information should be extracted from dsc_nome_br

        # type, title, series, season, year: should follow guessit properties names
        mapping = dict(
            id='id_filme',
            type='tipo',
            title='dsc_nome',
            series='dsc_nome',
            season='temporada',
            year='dsc_data_lancamento',
            title_br='dsc_nome_br',
            imdb_id='id_imdb'
        )

        # movie, episode: should follow guessit type values
        type_map = {
            'M': 'movie',
            'S': 'episode',
            'C': 'episode'  # Considering C as episode. Probably C stands for Collections
        }

        # Regex to extract the season number. e.g.: 3\u00aa Temporada, 1a Temporada, 2nd Season
        season_re = re.compile('.*? - (\d{1,2}).*?((emporada)|(season))', re.IGNORECASE)

        # Regex to extract the IMDB id. e.g.: tt02342
        imdb_re = re.compile('t{0,2}(\d+)')

        candidates = []
        for result in results.values():
            entry = result['_source']
            item = {k: entry.get(v) for k, v in mapping.items()}
            item['type'] = type_map.get(item.get('type'), 'movie')
            imdb_match = imdb_re.search(item.get('imdb_id'))
            item['imdb_id'] = imdb_match.group(1) if imdb_match else None

            # Season information might be missing and it should be extracted from 'title_br'
            if not item.get('season') and item.get('title_br'):
                season_match = season_re.search(item.get('title_br'))
                item['season'] = season_match.group(1) if season_match else None

            # Some string fields are actually integers
            for field in ['season', 'year', 'imdb_id']:
                field_text = item.get(field)
                item[field] = int(field_text) if field_text and field_text.isdigit() else None

            # ignoring episode match since this first step is only about movie/season information
            if self.matches(item, title, expected_season=season, expected_episode=episode, expected_year=year,
                            ignore_episode=True):
                candidates.append(dict(item))

        logger.debug('Candidates found: %s', candidates)
        return candidates

    def query(self, language, title, season=None, episode=None, year=None):
        """
        Returns a list of subtitles based on the input parameters.
          - 1st step: initial lookup for the movie/show information (see `search_candidates`)
          - 2nd step: list all candidates to movies/shows from previous step
          - 3rd step: reject candidates that doesn't match the input parameters (wrong season, wrong episode, etc...)
          - 4th step: download all subtitles to inspect the 'release name'
          - 5th step: creates a subtitle for each release

        :param language: the requested language
        :param str title: the movie/series title
        :param int season: the series season number
        :param int episode: the series episode number
        :param int year: the movie/series year
        :return: a list of subtitles that matches the query parameters
        :rtype: `list` of :class:`~subliminal.providers.LegendasTvSubtitle`

        """
        candidates = self.search_candidates(title, season, episode, year)

        # The language code used by legendas.tv
        language_code = language.legendastv

        # Regex to extract rating information (number of downloads and rate). e.g.: 12345 downloads, nota 10
        rating_info_re = re.compile('(\d*) downloads, nota (\d{0,2})')

        # Regex to extract the last update timestamp. e.g.: 25/12/2014 - 19:25
        timestamp_info_re = re.compile('(\d{1,2}/\d{1,2}/\d{2,4} \- \d{1,2}:\d{1,2})')

        # Regex to identify the 'pack' suffix that candidates might have. e.g.: (p)Breaking.Bad.S05.HDTV.x264
        pack_name_re = re.compile('^\(p\)')

        # Regex to extract the subtitle_id from the 'href'. e.g.: /download/560014472eb4d/foo/bar
        subtitle_href_re = re.compile('/download/(\w+)/.+')

        subtitles = []
        # loop over matched movies/shows
        for candidate in candidates:
            # page_url: {server_url}/util/carrega_legendas_busca_filme/{title_id}/{language_code}
            candidate_id = candidate.get('id')
            page_url = '%s/util/carrega_legendas_busca_filme/%s/%d' % (self.server_url, candidate_id, language_code)

            # loop over paginated results
            while page_url:
                # query the server
                r = self.session.get(page_url, timeout=TIMEOUT)
                r.raise_for_status()

                soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
                div_tags = soup.find_all('div', {'class': 'f_left'})

                # loop over each div which contains information about a single subtitle
                for div in div_tags:
                    a_tag = div.p.a
                    a_tag_text = a_tag.string if isinstance(a_tag.string, str) else a_tag.string.encode('utf-8')

                    # Removing forward-slashes from the candidate name (common practice in legendas.tv), since it
                    # misleads guessit to identify the candidate name as a file in a specific folder (which is wrong).
                    candidate_name = pack_name_re.sub('', a_tag_text).replace('/', '.')
                    page_link = a_tag['href']
                    subtitle_href_match = subtitle_href_re.search(page_link)
                    subtitle_id = subtitle_href_match.group(1) if subtitle_href_match else None
                    multiple_episodes = bool(div.find_parent('div', {'class': 'pack'}) or
                                             pack_name_re.findall(a_tag_text))
                    featured = bool(div.find_parent('div', {'class': 'destaque'}))
                    rating_info_match = rating_info_re.search(div.text)
                    no_downloads_text = rating_info_match.group(1) if rating_info_match else None
                    no_downloads = int(no_downloads_text) if no_downloads_text and no_downloads_text.isdigit() else None
                    rating_text = rating_info_match.group(2) if rating_info_match else None
                    rating = int(rating_text) if rating_text and rating_text.isdigit() else None
                    timestamp_info_match = timestamp_info_re.search(div.text)
                    timestamp_text = timestamp_info_match.group(1) if timestamp_info_match else None
                    timestamp = datetime.strptime(timestamp_text, '%d/%m/%Y - %H:%M') if timestamp_text else None

                    # Using the candidate name to filter out bad candidates
                    # (wrong type, wrong episode, wrong season or even wrong title)
                    guess = guessit(candidate_name, {'type': candidate.get('type')})
                    if not self.matches(guess, expected_title=title, expected_season=season, expected_episode=episode,
                                        expected_year=year, ignore_episode=multiple_episodes):
                        continue

                    # Unfortunately, the only possible way to know the release names of a specific candidate is to
                    # download the compressed file (rar/zip) and list the file names.
                    handler = LegendasTvArchiveHandler(self)
                    subtitle_names = handler.get_subtitle_names(subtitle_id, timestamp)

                    if not subtitle_names:
                        continue

                    for name in subtitle_names:
                        # Filtering out bad candidates (one archive might contain subtitles for the whole season,
                        # therefore this filtering is necessary)
                        guess = guessit(os.path.splitext(name)[0], {'type': candidate.get('type')})
                        if not self.matches(guess, expected_title=title, expected_season=season,
                                            expected_episode=episode, expected_year=year):
                            continue

                        subtitle = LegendasTvSubtitle(language, page_link, subtitle_id, name, handler.binary_content,
                                                      imdb_id=candidate.get('imdb_id'), type=candidate.get('type'),
                                                      season=candidate.get('season'), year=candidate.get('year'),
                                                      no_downloads=no_downloads, rating=rating, featured=featured,
                                                      multiple_episodes=multiple_episodes, timestamp=timestamp)

                        logger.debug('Found subtitle %s', subtitle)
                        subtitles.append(subtitle)

                next_page_link = soup.find('a', attrs={'class': 'load_more'}, text='carregar mais')
                page_url = self.server_url + next_page_link['href'] if next_page_link else None

        # High quality subtitles should have higher precedence when their scores are equal.
        subtitles.sort(key=lambda s: (s.featured, s.no_downloads, s.rating, s.multiple_episodes), reverse=True)

        return subtitles

    def list_subtitles(self, video, languages):
        """
        Returns a list of subtitles for the defined video and requested languages

        :param video:
        :param languages: the requested languages
        :return: a list of subtitles for the requested video and languages
        :rtype : `list` of :class:`~subliminal.providers.LegendasTvSubtitle`

        """
        season = episode = None
        if isinstance(video, Episode):
            title = video.series
            season = video.season
            episode = video.episode
        else:
            title = video.title
        year = video.year

        return [s for l in languages for s in self.query(l, title, season=season, episode=episode, year=year)]

    def get_subtitle_names(self, content):
        """
        Returns all subtitle names for the given rar/zip binary content.

        :param content: the downloaded binary content (rar/zip)
        :return: list of subtitle names
        :rtype: `list` of `string`

        """
        return self._uncompress(
            content,
            lambda cf: [f for f in cf.namelist()
                        if 'legendas.tv' not in f.lower() and f.lower().endswith(SUBTITLE_EXTENSIONS)])

    def extract_subtitle(self, content, subtitle_name):
        """
        Extract the subtitle content from the compressed file. The file is downloaded, the subtitle_name is uncompressed
        and its contents is returned.

        :param content: the downloaded binary content (rar/zip)
        :param str subtitle_name: the filename to be extracted
        :return: the subtitle content
        :rtype : `string`

        """
        return self._uncompress(content, lambda cf, name: fix_line_ending(cf.read(name)), subtitle_name)

    def _uncompress(self, content, function, *args, **kwargs):
        bc = io.BytesIO(content)

        cf = RarFile(bc) if is_rarfile(bc) else (ZipFile(bc) if is_zipfile(bc) else None)

        return function(cf, *args, **kwargs) if cf else None

    def download_content(self, subtitle_id, timestamp):
        """
        Downloads the compressed file for the specified subtitle_id. The timestamp is required in order to avoid the
        cache when the compressed file is updated (it's a common practice in legendas.tv to update the archive with new
        subtitles)

        :param str subtitle_id: the id used to download the compressed file
        :param str timestamp: represents the last update timestamp of the file
        :return: the downloaded file
        :rtype : `bytearray`

        """
        logger.debug('Downloading subtitle_id %s. Last update on %s', subtitle_id, timestamp)
        r = self.session.get('%s/downloadarquivo/%s' % (self.server_url, subtitle_id), timeout=TIMEOUT)
        r.raise_for_status()

        return r.content

    def download_subtitle(self, subtitle):
        bc = subtitle.binary_content if subtitle.binary_content else \
            self.download_content(subtitle.subtitle_id, subtitle.timestamp)

        subtitle.content = self.extract_subtitle(bc, subtitle.name)
コード例 #41
0
ファイル: itasa.py プロジェクト: Djang0/SickRage-1
class ItaSAProvider(Provider):
    languages = {Language('ita')}

    video_types = (Episode, )

    server_url = 'https://api.italiansubs.net/api/rest/'

    apikey = 'd86ad6ec041b334fac1e512174ee04d5'

    def __init__(self, username=None, password=None):
        if username is not None and password is None or username is None and password is not None:
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False
        self.login_itasa = False
        self.session = None
        self.auth_code = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/%s' % __version__

        # login
        if self.username is not None and self.password is not None:
            logger.info('Logging in')
            params = {
                'username': self.username,
                'password': self.password,
                'apikey': self.apikey
            }

            r = self.session.get(self.server_url + 'users/login',
                                 params=params,
                                 allow_redirects=False,
                                 timeout=10)
            root = etree.fromstring(r.content)

            if root.find('status').text == 'fail':
                raise AuthenticationError(root.find('error/message').text)

            self.auth_code = root.find('data/user/authcode').text

            data = {
                'username': self.username,
                'passwd': self.password,
                'remember': 'yes',
                'option': 'com_user',
                'task': 'login',
                'silent': 'true'
            }
            r = self.session.post('http://www.italiansubs.net/index.php',
                                  data=data,
                                  allow_redirects=False,
                                  timeout=30)
            r.raise_for_status()

            self.logged_in = True

    def terminate(self):
        self.session.close()
        self.logged_in = False

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        params = {'apikey': self.apikey}
        r = self.session.get(self.server_url + 'shows',
                             timeout=10,
                             params=params)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        # populate the show ids
        show_ids = {}
        for show in root.findall('data/shows/show'):
            if show.find('name').text is None:  # pragma: no cover
                continue
            show_ids[sanitize(show.find('name').text).lower()] = int(
                show.find('id').text)
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_show_id(self, series):
        """Search the show id from the `series`

        :param str series: series of the episode.
        :return: the show id, if found.
        :rtype: int or None

        """
        # build the param
        params = {'apikey': self.apikey, 'q': series}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'shows/search',
                             params=params,
                             timeout=10)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        if int(root.find('data/count').text) == 0:
            logger.warning('Show id not found: no suggestion')
            return None

        # Looking for show in first page
        for show in root.findall('data/shows/show'):
            if sanitize(show.find('name').text).lower() == sanitize(
                    series.lower()):
                show_id = int(show.find('id').text)
                logger.debug('Found show id %d', show_id)

                return show_id

        # Not in the first page of result try next (if any)
        next_page = root.find('data/next')
        while next_page.text is not None:  # pragma: no cover

            r = self.session.get(next_page.text, timeout=10)
            r.raise_for_status()
            root = etree.fromstring(r.content)

            logger.info('Loading suggestion page %r',
                        root.find('data/page').text)

            # Looking for show in following pages
            for show in root.findall('data/shows/show'):
                if sanitize(show.find('name').text).lower() == sanitize(
                        series.lower()):
                    show_id = int(show.find('id').text)
                    logger.debug('Found show id %d', show_id)

                    return show_id

            next_page = root.find('data/next')

        # No matches found
        logger.warning('Show id not found: suggestions does not match')

        return None

    def get_show_id(self, series, country_code=None):
        """Get the best matching show id for `series`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`

        :param str series: series of the episode.
        :param str country_code: the country in which teh show is aired.
        :return: the show id, if found.
        :rtype: int or None

        """
        series_sanitized = sanitize(series).lower()
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with country
        if not show_id and country_code:
            logger.debug('Getting show id with country')
            show_id = show_ids.get('%s %s' %
                                   (series_sanitized, country_code.lower()))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        # search as last resort
        if not show_id:
            logger.warning('Series not found in show ids')
            show_id = self._search_show_id(series)

        return show_id

    @region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME)
    def _download_zip(self, sub_id):
        # download the subtitle
        logger.info('Downloading subtitle %r', sub_id)

        params = {
            'authcode': self.auth_code,
            'apikey': self.apikey,
            'subtitle_id': sub_id
        }

        r = self.session.get(self.server_url + 'subtitles/download',
                             params=params,
                             timeout=30)
        r.raise_for_status()

        return r.content

    def _get_season_subtitles(self, show_id, season, sub_format):
        params = {
            'apikey': self.apikey,
            'show_id': show_id,
            'q': 'Stagione %%%d' % season,
            'version': sub_format
        }
        r = self.session.get(self.server_url + 'subtitles/search',
                             params=params,
                             timeout=30)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        if int(root.find('data/count').text) == 0:
            logger.warning(
                'Subtitles for season not found, try with rip suffix')

            params['version'] = sub_format + 'rip'
            r = self.session.get(self.server_url + 'subtitles/search',
                                 params=params,
                                 timeout=30)
            r.raise_for_status()
            root = etree.fromstring(r.content)
            if int(root.find('data/count').text) == 0:
                logger.warning('Subtitles for season not found')
                return []

        subs = []
        # Looking for subtitles in first page
        season_re = re.compile('.*?stagione 0*?%d.*' % season)
        for subtitle in root.findall('data/subtitles/subtitle'):
            if season_re.match(subtitle.find('name').text.lower()):
                logger.debug('Found season zip id %d - %r - %r',
                             int(subtitle.find('id').text),
                             subtitle.find('name').text,
                             subtitle.find('version').text)

                content = self._download_zip(int(subtitle.find('id').text))
                if not is_zipfile(io.BytesIO(content)):  # pragma: no cover
                    if 'limite di download' in content:
                        raise TooManyRequests()
                    else:
                        raise ConfigurationError('Not a zip file: %r' %
                                                 content)

                with ZipFile(io.BytesIO(content)) as zf:
                    episode_re = re.compile('s(\d{1,2})e(\d{1,2})')
                    for index, name in enumerate(zf.namelist()):
                        match = episode_re.search(name)
                        if not match:  # pragma: no cover
                            logger.debug('Cannot decode subtitle %r', name)
                        else:
                            sub = ItaSASubtitle(
                                int(subtitle.find('id').text),
                                subtitle.find('show_name').text,
                                int(match.group(1)), int(match.group(2)), None,
                                None, None, name)
                            sub.content = fix_line_ending(zf.read(name))
                            subs.append(sub)

        return subs

    def query(self,
              series,
              season,
              episode,
              video_format,
              resolution,
              country=None):

        # To make queries you need to be logged in
        if not self.logged_in:  # pragma: no cover
            raise ConfigurationError('Cannot query if not logged in')

        # get the show id
        show_id = self.get_show_id(series, country)
        if show_id is None:
            logger.error('No show id found for %r ', series)
            return []

        # get the page of the season of the show
        logger.info(
            'Getting the subtitle of show id %d, season %d episode %d, format %r',
            show_id, season, episode, video_format)
        subtitles = []

        # Default format is SDTV
        if not video_format or video_format.lower() == 'hdtv':
            if resolution in ('1080i', '1080p', '720p'):
                sub_format = resolution
            else:
                sub_format = 'normale'
        else:
            sub_format = video_format.lower()

        # Look for year
        params = {'apikey': self.apikey}
        r = self.session.get(self.server_url + 'shows/' + str(show_id),
                             params=params,
                             timeout=30)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        year = root.find('data/show/started').text
        if year:
            year = int(year.split('-', 1)[0])
        tvdb_id = root.find('data/show/id_tvdb').text
        if tvdb_id:
            tvdb_id = int(tvdb_id)

        params = {
            'apikey': self.apikey,
            'show_id': show_id,
            'q': '%dx%02d' % (season, episode),
            'version': sub_format
        }
        r = self.session.get(self.server_url + 'subtitles/search',
                             params=params,
                             timeout=30)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        if int(root.find('data/count').text) == 0:
            logger.warning('Subtitles not found,  try with rip suffix')

            params['version'] = sub_format + 'rip'
            r = self.session.get(self.server_url + 'subtitles/search',
                                 params=params,
                                 timeout=30)
            r.raise_for_status()
            root = etree.fromstring(r.content)
            if int(root.find('data/count').text) == 0:
                logger.warning('Subtitles not found, go season mode')

                # If no subtitle are found for single episode try to download all season zip
                subs = self._get_season_subtitles(show_id, season, sub_format)
                if subs:
                    for subtitle in subs:
                        subtitle.format = video_format
                        subtitle.year = year
                        subtitle.tvdb_id = tvdb_id

                    return subs
                else:
                    return []

        # Looking for subtitles in first page
        for subtitle in root.findall('data/subtitles/subtitle'):
            if '%dx%02d' % (season,
                            episode) in subtitle.find('name').text.lower():

                logger.debug('Found subtitle id %d - %r - %r',
                             int(subtitle.find('id').text),
                             subtitle.find('name').text,
                             subtitle.find('version').text)

                sub = ItaSASubtitle(int(subtitle.find('id').text),
                                    subtitle.find('show_name').text, season,
                                    episode, video_format, year, tvdb_id,
                                    subtitle.find('name').text)

                subtitles.append(sub)

        # Not in the first page of result try next (if any)
        next_page = root.find('data/next')
        while next_page.text is not None:  # pragma: no cover

            r = self.session.get(next_page.text, timeout=30)
            r.raise_for_status()
            root = etree.fromstring(r.content)

            logger.info('Loading subtitles page %r', root.data.page.text)

            # Looking for show in following pages
            for subtitle in root.findall('data/subtitles/subtitle'):
                if '%dx%02d' % (season,
                                episode) in subtitle.find('name').text.lower():

                    logger.debug('Found subtitle id %d - %r - %r',
                                 int(subtitle.find('id').text),
                                 subtitle.find('name').text,
                                 subtitle.find('version').text)

                    sub = ItaSASubtitle(int(subtitle.find('id').text),
                                        subtitle.find('show_name').text,
                                        season, episode, video_format, year,
                                        tvdb_id,
                                        subtitle.find('name').text)

                    subtitles.append(sub)

            next_page = root.find('data/next')

        # Download the subs found, can be more than one in zip
        additional_subs = []
        for sub in subtitles:

            # open the zip
            content = self._download_zip(sub.sub_id)
            if not is_zipfile(io.BytesIO(content)):  # pragma: no cover
                if 'limite di download' in content:
                    raise TooManyRequests()
                else:
                    raise ConfigurationError('Not a zip file: %r' % content)

            with ZipFile(io.BytesIO(content)) as zf:
                if len(zf.namelist()) > 1:  # pragma: no cover

                    for index, name in enumerate(zf.namelist()):

                        if index == 0:
                            # First element
                            sub.content = fix_line_ending(zf.read(name))
                            sub.full_data = name
                        else:
                            add_sub = copy.deepcopy(sub)
                            add_sub.content = fix_line_ending(zf.read(name))
                            add_sub.full_data = name
                            additional_subs.append(add_sub)
                else:
                    sub.content = fix_line_ending(zf.read(zf.namelist()[0]))
                    sub.full_data = zf.namelist()[0]

        return subtitles + additional_subs

    def list_subtitles(self, video, languages):
        return self.query(video.series, video.season, video.episode,
                          video.format, video.resolution)

    def download_subtitle(self, subtitle):  # pragma: no cover
        pass
コード例 #42
0
class User(object):
    """
    class for user credentials and sending and posting requests

    Attributes
    ----------
    session : requests.Session
    username : str
    logged_in : bool
        Boolean standing for login state. True if logged in
    """
    def __init__(self):
        super(User, self).__init__()
        self.session = Session()
        self.logged_in = False
        self.username = None

    def __del__(self):
        try:
            self.session.close()
        except TypeError:
            pass

    def login(self, username):
        """
        Sets the attributes according to login
        """
        self.username = username
        self.logged_in = True
        return self

    def logout(self):
        """
        When logging out
        """
        self.logged_in = False

    def check_login(self):
        """
        Raise an error if user is not logged in
        """
        if self.logged_in is False:
            raise AUTHError('%s is not logged in.' % self.username)

    def post(self, url, **kwargs):
        """
        Wrap session post
        """
        response = self.session.post(url, **kwargs)
        return response

    def get(self, url, **kwargs):
        """
         Wrap session get
         """
        response = self.session.get(url, **kwargs)
        return response

    @staticmethod
    def check_response(response):
        """
        Check for errors in a REST call
        """
        if response.ok:
            return response.json()
        else:
            response.raise_for_status()
コード例 #43
0
ファイル: 48001.py プロジェクト: ziqi521/exploitdb
            pass
    count = i  # does not include header
    print("[+] Complete. {} users written to file '{}'".format(
        count, filename))
    print("[+] Sample Content:")
    with open(filename) as f:
        for n in range(2):
            print(",".join(f.readline().split("\t")), end="")


def dumpSysInfo():
    url = base_url + "/servlet/com.threeis.webta.H200mnuAdmin"
    data = {"selFunc": "about"}
    resp = web_req(url, data)
    html = resp.text
    data = re.findall(r'<INPUT VALUE\="(.*?)"', html, re.DOTALL)
    print("[+] " + data[0])


if __name__ == '__main__':
    print(banner)
    login()
    findAdmins()
    privesc()
    login()  # login again because we need the refreshed perms after privesc
    dumpSysInfo()
    #stealPII()
    if xss:
        storeXSS()
    s.close()
コード例 #44
0
class AssrtProvider(Provider):
    """Assrt Provider."""
    languages = {Language(*l) for l in supported_languages}
    video_types = (Episode, Movie)

    def __init__(self, token=None):
        if not token:
            raise ConfigurationError('Token must be specified')
        self.token = token

    def initialize(self):
        self.session = Session()
        self.session.headers = {
            'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
        }

    def terminate(self):
        self.session.close()

    def query(self, languages, video):
        # query the server
        keywords = []
        if isinstance(video, Movie):
            if video.title:
                # title = "".join(e for e in video.title if e.isalnum())
                title = video.title
                keywords.append(title)
            if video.year:
                keywords.append(str(video.year))
        elif isinstance(video, Episode):
            if video.series:
                # series = "".join(e for e in video.series if e.isalnum())
                series = video.series
                keywords.append(series)
            if video.season and video.episode:
                keywords.append('S%02dE%02d' % (video.season, video.episode))
            elif video.episode:
                keywords.append('E%02d' % video.episode)
        query = ' '.join(keywords)

        params = {'token': self.token, 'q': query, 'is_file': 1}
        logger.debug('Searching subtitles: GET /sub/search %r', params)
        res = self.session.get(server_url + '/sub/search',
                               params=params,
                               timeout=10)
        res.raise_for_status()
        result = res.json()

        if result['status'] != 0:
            logger.error('status error: %r', result['status'])
            return []

        # parse the subtitles
        pattern = re.compile(r'lang(?P<code>\w+)')
        subtitles = []
        for sub in result['sub']['subs']:
            if 'lang' not in sub:
                continue
            for key in sub['lang']['langlist'].keys():
                match = pattern.match(key)
                try:
                    language = Language.fromassrt(match.group('code'))
                    output_language = search_language_in_list(
                        language, languages)
                    if output_language:
                        subtitles.append(
                            AssrtSubtitle(output_language, sub['id'],
                                          sub['videoname'], self.session,
                                          self.token))
                except:
                    pass

        return subtitles

    def list_subtitles(self, video, languages):
        return self.query(languages, video)

    def download_subtitle(self, subtitle):
        r = self.session.get(subtitle.download_link, timeout=10)
        r.raise_for_status()

        subtitle.content = fix_line_ending(r.content)
コード例 #45
0
class OneP_Request:
    def __init__(self,
                 host,
                 https=True,
                 httptimeout=15,
                 headers={},
                 reuseconnection=False,
                 log=None,
                 curldebug=False):
        self.host = ('https://' + host) if https else ('http://' + host)
        self.https = https
        self.httptimeout = httptimeout
        self.headers = headers
        self.reuseconnection = reuseconnection
        self.session = Session()
        self.session.headers.update(self.headers)
        self.log = log
        self.curldebug = curldebug

    def request(self,
                method,
                path,
                body=None,
                headers={},
                exception_fn=None,
                notimeout=False,
                verify=True):
        """Wraps HTTPConnection.request. On exception it calls exception_fn
        with the exception object. If exception_fn is None, it re-raises the
        exception. If notimeout is True, create a new connection (regardless of
        self.reuseconnection setting) that uses the global default timeout for
        sockets (usually None)."""
        # This needs to be done first because self.session may be None
        if not self.reuseconnection or notimeout:
            self.close()
            self.session = Session()

        allheaders = headers
        allheaders.update(self.session.headers)

        try:
            if self.curldebug:
                # output request as a curl call
                def escape(s):
                    """escape single quotes for bash"""
                    return s.replace("'", "'\\''")

                self.log.debug("curl '{1}{2}' -X {3} -m {4} {5} {6}".format(
                    'https' if self.https else 'http', self.host, path, method,
                    self.httptimeout, ' '.join([
                        '-H \'{0}: {1}\''.format(escape(h),
                                                 escape(allheaders[h]))
                        for h in allheaders
                    ]), '' if body is None else '-d \'' + escape(body) + '\''))
            else:
                self.log.debug("%s %s\nHost: %s\nHeaders: %s" %
                               (method, path, self.host, allheaders))
                if body is not None:
                    self.log.debug("Body: %s" % body)
            URI = self.host + path
            prepped = self.session.prepare_request(
                Request(method, URI, data=body, headers=headers))

            response = self.session.send(
                prepped,
                verify=verify,
                timeout=None if notimeout else self.httptimeout)
            return response.text, response

        except Exception:
            self.close()
            ex = sys.exc_info()[1]
            if exception_fn is not None:
                exception_fn(ex)
            else:
                raise ex

    def close(self):
        """Closes any open connection. This should only need to be called if
        reuseconnection is set to True. Once it's closed, the connection may be
        reopened by making another API called."""
        if self.session is not None:
            self.session.close()
            self.session = None
コード例 #46
0
    def _collect_sapcloudconnector(self):
        #
        #  Uses monitoring API:
        # https://help.sap.com/viewer/cca91383641e40ffbe03bdc78f00f681/Cloud/en-US/f6e7a7bc6af345d2a334c2427a31d294.html
        #
        #  Configuring : Make port 8443 available. add this to users.xml and restart SCC.
        #
        #  <user username="******" password="******" roles="sccmonitoring"/>
        #
        cloud_connector_url = "{0}:{1}/".format(self.url, "8443").replace(
            "http://", "https://")
        self.log.debug(
            "{0}: Trying to connect to sapcloudconnector on url: {1}".format(
                self.host, cloud_connector_url))
        health_url = cloud_connector_url + "exposed?action=ping"
        #
        #   1 second timeout to connect, 30 to read data.
        #
        status_code = 0
        session = Session()
        session.auth = HTTPBasicAuth(self.user, self.password)
        session.timeout = (1, 30)
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        try:
            health = session.get(cloud_connector_url)
            status_code = health.status_code
        except Exception:
            self.log.debug(
                "{0}: No SAP Cloud connector found on url: {1}".format(
                    self.host, health_url))
            status_code = 500

        if status_code == 200:
            self.log.info(
                "{0}: Got health from cloud connector on url: {1}".format(
                    self.host, health_url))

            external_id = str(self._scc_external_id())
            component_data = {
                "name": "SCC",
                "description": "SAP Cloud Connector",
                # "type": "SAP Cloud Connector",
                # "sid": "SCC",
                "host": self.host,
                # "system_number": "99",
                # "version": "v1",
                "domain": self.domain,
                "environment": self.stackstate_environment,
                "tags": self.tags
                # "labels": []
            }
            self.log.debug("{0}: -----> component_data : {1}".format(
                self.host, component_data))
            self.log.debug("{0}: -----> external_id : {1}".format(
                self.host, external_id))
            self.component(external_id, "sap-cloud-connector", component_data)

            # define relation  cloud connector    -->    host
            #                          is hosted on
            source_id = external_id
            target_id = self._host_external_id()
            relation_data = {}
            self.relation(source_id, target_id, "is hosted on", relation_data)

            # define scc status event
            self.event({
                "timestamp": int(time.time()),
                "source_type_name": "SAP:scc state",
                # "source_type_name": "SAP:host instance",
                "msg_title": "SCC status update.",
                "msg_text": "",
                "host": self.host,
                "tags": ["instance_id:99", "status:sapcontrol-green"]
            })
            #
            # Lists sub accounts to the SAP Cloud and connection tunnels
            #
            subaccount_url = cloud_connector_url + "api/monitoring/subaccounts"
            subaccount_reply = session.get(subaccount_url)
            if subaccount_reply.status_code == 200:
                reply = subaccount_reply.text.encode('utf-8')
                self.log.debug(
                    "{0}: Sub accounts reply from cloud connector : {1}".
                    format(self.host, reply))
                subaccounts = json.loads(subaccount_reply.text)
                self.log.debug(
                    "{0}: JSON sub accounts from cloud connector : {1}".format(
                        self.host, subaccounts))
                for subaccount in subaccounts["subaccounts"]:
                    self.log.debug("{0}: subaccount: {1}".format(
                        self.host, subaccount))
                    # define cloud connector component
                    subaccount_name = str(subaccount.get("displayName"))
                    # display name is not always setup
                    if subaccount_name == "None":
                        subaccount_name = str(subaccount.get("subaccount"))
                    external_id = str(
                        self._scc_subaccount_external_id(
                            subaccount.get("subaccount")))
                    tunnel = subaccount.get("tunnel")

                    component_data = {
                        "name": subaccount_name,
                        "description": str(subaccount.get("description")),
                        "state": str(tunnel.get("state")),
                        "connectedSince": str(tunnel.get("connectedSince")),
                        "connections": str(tunnel.get("connections")),
                        "user": str(tunnel.get("user")),
                        "regionHost": str(subaccount.get("regionHost")),
                        "subaccount": str(subaccount.get("subaccount")),
                        "locationID": str(subaccount.get("locationID")),
                        "layer": "SAP SCC Sub Accounts",
                        "domain": self.domain,
                        "environment": self.stackstate_environment,
                        "host": self.host,
                        "tags": self.tags
                        # "labels": []
                    }
                    self.log.debug("{0}: -----> component_data : {1}".format(
                        self.host, component_data))
                    self.log.debug("{0}: -----> external_id : {1}".format(
                        self.host, external_id))
                    self.component(external_id, "sap-scc-subaccount",
                                   component_data)

                    # define relation  cloud connector    -->    host
                    #                          is hosted on
                    source_id = external_id
                    target_id = self._scc_external_id()
                    relation_data = {}
                    self.relation(source_id, target_id, "is_setup_on",
                                  relation_data)

                    # define cloud connector status event

                    tunnel_status = self._scc_subaccount_status(
                        tunnel.get("state"))
                    self.event({
                        "timestamp":
                        int(time.time()),
                        "source_type_name":
                        "SAP:scc subaccount state",
                        "msg_title":
                        "SAP Cloud Connector '{0}' status update.".format(
                            subaccount_name),
                        "msg_text":
                        "",
                        "host":
                        self.host,
                        "tags": [
                            "status:{0}".format(tunnel_status),
                            "subaccount_name:{0}".format(subaccount_name)
                        ]
                    })
            else:
                if subaccount_reply.status_code == 400:
                    msg = "{0}: SAP Cloud connector monitoring sub account page not " \
                          "supported in this version of SCC.".format(self.host)
                    self.log.info(msg)
                else:
                    status = subaccount_reply.status_code
                    self.log.error(
                        "{0}: No SAP Cloud connector sub account found. Status code: {1}"
                        .format(self.host, status))
            #
            #   List backend SAP systems and virtual names.
            #
            backends_url = cloud_connector_url + "api/monitoring/connections/backends"
            backends_reply = session.get(backends_url)
            if backends_reply.status_code == 200:
                reply = backends_reply.text.encode('utf-8')
                self.log.debug(
                    "{0}: Backends reply from cloud connector : {1}".format(
                        self.host, reply))
                backends = json.loads(backends_reply.text)
                self.log.info(
                    "{0}: JSON backends from cloud connector : {1}".format(
                        self.host, backends))
                for subaccount in backends["subaccounts"]:
                    # subaccount["regionHost"]
                    # subaccount["subaccount"]
                    # subaccount["locationID"]
                    virtualbackend = str(subaccount.get("virtualBackend"))
                    for backend in subaccount["backendConnections"]:
                        external_id = self._scc_backend_external_id(
                            subaccount["subaccount"], virtualbackend)
                        component_data = {
                            "virtualBackend": virtualbackend,
                            "internalBackend":
                            str(backend.get("internalBackend")),
                            "protocol": str(backend.get("protocol")),
                            "idle": str(backend.get("idle")),
                            "active": str(backend.get("active")),
                            "labels": [],
                            "layer": "SAP SCC Back-ends",
                            "domain": self.domain,
                            "environment": self.stackstate_environment,
                            "tags": self.tags
                        }
                        self.log.debug("{0}: ------> external_id : {1}".format(
                            self.host, external_id))
                        self.component(external_id, "sap-cloud",
                                       component_data)
                        # define relation  cloud connector    -->    host
                        #                          is hosted on
                        source_id = external_id
                        target_id = self._scc_subaccount_external_id(
                            subaccount["subaccount"])
                        relation_data = {}
                        self.relation(source_id, target_id, "is connected to",
                                      relation_data)
                        self.event({
                            "timestamp":
                            int(time.time()),
                            "source_type_name":
                            "SAP:cloud component state",
                            "msg_title":
                            "SAP Cloud Connector '{0}' status update.".format(
                                backend["virtualBackend"]),
                            "msg_text":
                            "",
                            "host":
                            self.host,
                            "tags": [
                                "active:{0}".format(backend["active"]),
                                "idle:{0}".format(backend["idle"])
                            ]
                        })
            else:
                if backends_reply.status_code == 400:
                    msg = "{0}: SAP Cloud connector monitoring backend page not supported " \
                          "in this version of SCC.".format(self.host)
                    self.log.info(msg)
                else:
                    status = backends_reply.status_code
                    self.log.error(
                        "{0}: No SAP Cloud connector backends found. Status code: {1}"
                        .format(self.host, status))
        if status_code == 401:
            msg = "{0}: Authentication failed, check your config.yml and SCC users.xml " \
                  "for corresponding username and password.".format(self.host)
            self.log.error(msg)
        session.close()
コード例 #47
0
ファイル: base_client.py プロジェクト: plivo/plivo-python
class BaseClient(object):
    def __init__(self, auth_id=None, auth_token=None, proxies=None, timeout=5):
        """
        The Plivo API client.

        Deals with all the API requests to be made.
        """

        self.base_uri = PLIVO_API_BASE_URI
        self.session = Session()
        self.session.headers.update({
            'User-Agent': get_user_agent(),
            'Content-Type': 'application/json',
            'Accept': 'application/json',
        })
        self.session.auth = fetch_credentials(auth_id, auth_token)
        self.multipart_session = Session()
        self.multipart_session.headers.update({
            'User-Agent': get_user_agent(),
            'Cache-Control': 'no-cache',
        })
        self.multipart_session.auth = fetch_credentials(auth_id, auth_token)
        self.proxies = proxies
        self.timeout = timeout


    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.session.close()
        self.multipart_session.close()

    def process_response(self,
                         method,
                         response,
                         response_type=None,
                         objects_type=None):
        """Processes the API response based on the status codes and method used
        to access the API
        """

        try:
            response_json = response.json(
                object_hook=
                lambda x: ResponseObject(x) if isinstance(x, dict) else x)
            if response_type:
                r = response_type(self, response_json.__dict__)
                response_json = r

            if 'objects' in response_json and objects_type:
                response_json.objects = [
                    objects_type(self, obj.__dict__)
                    for obj in response_json.objects
                ]
        except ValueError:
            response_json = None

        if response.status_code == 400:
            if response_json and 'error' in response_json:
                raise ValidationError(response_json.error)
            raise ValidationError(
                'A parameter is missing or is invalid while accessing resource'
                'at: {url}'.format(url=response.url))

        if response.status_code == 401:
            if response_json and 'error' in response_json:
                raise AuthenticationError(response_json.error)
            raise AuthenticationError(
                'Failed to authenticate while accessing resource at: '
                '{url}'.format(url=response.url))

        if response.status_code == 404:
            if response_json and 'error' in response_json:
                raise ResourceNotFoundError(response_json.error)
            raise ResourceNotFoundError(
                'Resource not found at: {url}'.format(url=response.url))

        if response.status_code == 405:
            if response_json and 'error' in response_json:
                raise InvalidRequestError(response_json.error)
            raise InvalidRequestError(
                'HTTP method "{method}" not allowed to access resource at: '
                '{url}'.format(method=method, url=response.url))

        if response.status_code == 500:
            if response_json and 'error' in response_json:
                raise PlivoServerError(response_json.error)
            raise PlivoServerError(
                'A server error occurred while accessing resource at: '
                '{url}'.format(url=response.url))

        if method == 'DELETE':
            if response.status_code != 204:
                raise PlivoRestError('Resource at {url} could not be '
                                     'deleted'.format(url=response.url))

        elif response.status_code not in [200, 201, 202]:
            raise PlivoRestError(
                'Received status code {status_code} for the HTTP method '
                '"{method}"'.format(
                    status_code=response.status_code, method=method))

        return response_json

    def create_request(self, method, path=None, data=None):
        path = path or []
        req = Request(method, '/'.join([self.base_uri, self.session.auth[0]] +
                                       list([str(p) for p in path])) + '/',
                      **({
                          'params': data
                      } if method == 'GET' else {
                          'json': data
                      }))
        return self.session.prepare_request(req)

    def create_multipart_request(self,
                                 method,
                                 path=None,
                                 data=None,
                                 files=None):
        path = path or []

        data_args = {}
        if method == 'GET':
            data_args['params'] = data
        else:
            data_args['data'] = data
            if files and 'file' in files and files['file'] != '':
                data_args['files'] = files

        req = Request(method,
                      '/'.join([self.base_uri, self.multipart_session.auth[0]]
                               + list([str(p) for p in path])) + '/', **(
                                   data_args))
        return self.multipart_session.prepare_request(req)

    def send_request(self, request, **kwargs):
        if 'session' in kwargs:
            session = kwargs['session']
            del kwargs['session']
        else:
            session = self.session

        return session.send(
            request, proxies=self.proxies, timeout=self.timeout, **kwargs)

    def request(self,
                method,
                path=None,
                data=None,
                response_type=None,
                objects_type=None,
                files=None,
                **kwargs):
        if files is not None:
            req = self.create_multipart_request(method, path, data, files)
            session = self.multipart_session
        else:
            req = self.create_request(method, path, data)
            session = self.session
        kwargs['session'] = session
        res = self.send_request(req, **kwargs)
        return self.process_response(method, res, response_type, objects_type)
コード例 #48
0
class SubsSabBzProvider(Provider):
    """SubsSabBz Provider."""
    languages = {Language('por', 'BR')} | {Language(l) for l in [
        'bul', 'eng'
    ]}

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
        self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
        self.session.headers["Accept-Encoding"] = "gzip, deflate, br"
        self.session.headers["DNT"] = "1"
        self.session.headers["Connection"] = "keep-alive"
        self.session.headers["Upgrade-Insecure-Requests"] = "1"
        self.session.headers["Cache-Control"] = "max-age=0"

    def terminate(self):
        self.session.close()

    def query(self, language, video):
        subtitles = []
        isEpisode = isinstance(video, Episode)

        params = {
            'act': 'search',
            'movie': '',
            'select-language': '2',
            'upldr': '',
            'yr': '',
            'release': ''
        }

        if isEpisode:
            params['movie'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode)
        else:
            params['yr'] = video.year
            params['movie'] = (video.title)

        if language == 'en' or language == 'eng':
            params['select-language'] = 1

        logger.info('Searching subtitle %r', params)
        response = self.session.post('http://subs.sab.bz/index.php?', params=params, allow_redirects=False, timeout=10, headers={
            'Referer': 'http://subs.sab.bz/',
            })

        response.raise_for_status()

        if response.status_code != 200:
            logger.debug('No subtitles found')
            return subtitles

        soup = BeautifulSoup(response.content, 'html.parser')
        rows = soup.findAll('tr', {'class': 'subs-row'})

        # Search on first 10 rows only
        for row in rows[:10]:
            a_element_wrapper = row.find('td', { 'class': 'c2field' })
            if a_element_wrapper:
                element = row.find('a')
                if element:
                    link = element.get('href')
                    logger.info('Found subtitle link %r', link)
                    subtitles = subtitles + self.download_archive_and_add_subtitle_files(link, language, video)

        return subtitles

    def list_subtitles(self, video, languages):
        return [s for l in languages for s in self.query(l, video)]

    def download_subtitle(self, subtitle):
        pass

    def process_archive_subtitle_files(self, archiveStream, language, video):
        subtitles = []
        type = 'episode' if isinstance(video, Episode) else 'movie'
        for file_name in archiveStream.namelist():
            if file_name.lower().endswith(('.srt', '.sub')):
                logger.info('Found subtitle file %r', file_name)
                subtitle = SubsSabBzSubtitle(language, file_name, type)
                subtitle.content = archiveStream.read(file_name)
                subtitles.append(subtitle)
        return subtitles

    def download_archive_and_add_subtitle_files(self, link, language, video ):
        logger.info('Downloading subtitle %r', link)
        request = self.session.get(link, headers={
            'Referer': 'http://subs.sab.bz/index.php?'
            })
        request.raise_for_status()

        archive_stream = io.BytesIO(request.content)
        if is_rarfile(archive_stream):
            return self.process_archive_subtitle_files( RarFile(archive_stream), language, video )
        elif is_zipfile(archive_stream):
            return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video )
        else:
            raise ValueError('Not a valid archive')
コード例 #49
0
ファイル: sensor.py プロジェクト: pasna/myconfig
class RestDataMod:
    """Class for handling the data retrieval."""
    def __init__(
        self,
        method,
        resource,
        auth,
        headers,
        data,
        verify_ssl,
        timeout=DEFAULT_TIMEOUT,
        proxy_url=None,
    ):
        """Initialize the data object."""
        self._method = method
        self._resource = resource
        self._auth = auth
        self._headers = headers
        self._request_data = data
        self._verify_ssl = verify_ssl
        self._timeout = timeout
        self._http_session = Session()

        if proxy_url is not None:
            self._proxies = {"http": proxy_url, "https": proxy_url}
        else:
            self._proxies = None

        self.data = None
        self.headers = None

    def set_payload(self, payload):
        """Set payload."""
        self._request_data = payload

    def __del__(self):
        """Destroy the http session on destroy."""
        self._http_session.close()

    def set_url(self, url):
        """Set url."""
        self._resource = url

    def update(self):
        """Get the latest data from REST service with provided method."""
        _LOGGER.debug("Updating from %s", self._resource)

        headers = {}
        if self._headers:
            for header_name, header_template in self._headers.items():
                headers[header_name] = header_template.render()

        try:
            response = self._http_session.request(
                self._method,
                self._resource,
                headers=headers,
                auth=self._auth,
                data=self._request_data,
                timeout=self._timeout,
                verify=self._verify_ssl,
                proxies=self._proxies,
            )
            self.data = response.text
            self.headers = response.headers
        except requests.exceptions.RequestException as ex:
            _LOGGER.warning("Error fetching data: %s failed with %s",
                            self._resource, ex)
            self.data = None
            self.headers = None
        except Exception as err:
            _LOGGER.warning("Unknown error: %s", err)
            self.data = None
            self.headers = None
コード例 #50
0
ファイル: addic7ed.py プロジェクト: halali/Auto-Subliminal
class Addic7edProvider(Provider):
    """Addic7ed Provider."""
    languages = {Language('por', 'BR')} | {
        Language(l)
        for l in [
            'ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu',
            'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg', 'heb', 'hrv',
            'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld',
            'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi',
            'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho'
        ]
    }
    video_types = (Episode, )
    server_url = 'http://www.addic7ed.com/'
    subtitle_class = Addic7edSubtitle

    def __init__(self, username=None, password=None, random_user_agent=False):
        if any((username, password)) and not all((username, password)):
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False
        self.session = None
        self.random_user_agent = random_user_agent

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = self._get_user_agent()

        # login
        if self.username and self.password:
            logger.info('Logging in')
            data = {
                'username': self.username,
                'password': self.password,
                'Submit': 'Log in'
            }
            r = self.session.post(self.server_url + 'dologin.php',
                                  data,
                                  allow_redirects=False,
                                  timeout=10)

            if r.status_code != 302:
                raise AuthenticationError(self.username)

            logger.debug('Logged in')
            self.logged_in = True

    def terminate(self):
        # logout
        if self.logged_in:
            logger.info('Logging out')
            r = self.session.get(self.server_url + 'logout.php', timeout=10)
            r.raise_for_status()
            logger.debug('Logged out')
            self.logged_in = False

        self.session.close()

    def _get_user_agent(self):
        user_agent = 'Subliminal/%s' % __short_version__
        if self.random_user_agent:
            from autosubliminal.providers.useragents import RANDOM_USER_AGENTS
            user_agent = RANDOM_USER_AGENTS[random.randint(
                0,
                len(RANDOM_USER_AGENTS) - 1)]
            logger.debug('Using random user agent: %s', user_agent)
        return user_agent

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(b''.join(show_cells),
                                       ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.content and use 'html.parser'
            soup = ParserBeautifulSoup(r.content, ['html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td.version > h3 > a[href^="/show/"]'):
            show_ids[sanitize(show.text)] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'search.php',
                             params=params,
                             timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get the suggestion
        suggestion = soup.select('span.titulo > a[href^="/show/"]')
        if not suggestion:
            logger.warning('Show id not found: no suggestion')
            return None
        if not sanitize(suggestion[0].i.text.replace(
                '\'', ' ')) == sanitize(series_year):
            logger.warning('Show id not found: suggestion does not match')
            return None
        show_id = int(suggestion[0]['href'][6:])
        logger.debug('Found show id %d', show_id)

        return show_id

    def get_show_id(self, series, year=None, country_code=None):
        """Get the best matching show id for `series`, `year` and `country_code`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :param country_code: country code of the series, if any.
        :type country_code: str
        :return: the show id, if found.
        :rtype: int

        """
        series_sanitized = sanitize(series).lower()
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with country
        if not show_id and country_code:
            logger.debug('Getting show id with country')
            show_id = show_ids.get('%s %s' %
                                   (series_sanitized, country_code.lower()))

        # attempt with year
        if not show_id and year:
            logger.debug('Getting show id with year')
            show_id = show_ids.get('%s %d' % (series_sanitized, year))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        # search as last resort
        if not show_id:
            logger.warning('Series not found in show ids')
            show_id = self._search_show_id(series)

        return show_id

    def query(self, series, season, year=None, country=None):
        # get the show id
        show_id = self.get_show_id(series, year, country)
        if show_id is None:
            logger.error('No show id found for %r (%r)', series, {
                'year': year,
                'country': country
            })
            return []

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)
        r = self.session.get(self.server_url + 'show/%d' % show_id,
                             params={'season': season},
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            # Provider returns a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        match = series_year_re.match(
            soup.select('#header font')[0].text.strip()[:-10])
        series = match.group('series')
        year = int(match.group('year')) if match.group('year') else None
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if status != 'Completed':
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = self.subtitle_class(language, hearing_impaired,
                                           page_link, series, season, episode,
                                           title, year, version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        titles = [video.series] + video.alternative_series
        for title in titles:
            subtitles = [
                s for s in self.query(title, video.season, video.year)
                if s.language in languages and s.episode == video.episode
            ]
            if subtitles:
                return subtitles

        return []

    def download_subtitle(self, subtitle):
        # download the subtitle
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(self.server_url + subtitle.download_link,
                             headers={'Referer': subtitle.page_link},
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            # Provider returns a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.debug(
                'Unable to download subtitle. No data returned from provider')
            return

        # detect download limit exceeded
        if r.headers['Content-Type'] == 'text/html':
            raise DownloadLimitExceeded

        subtitle.content = fix_line_ending(r.content)
コード例 #51
0
ファイル: itasa.py プロジェクト: SiCKRAGETV/SiCKRAGE
class ItaSAProvider(Provider):
    languages = {Language('ita')}

    video_types = (Episode,)

    server_url = 'https://api.italiansubs.net/api/rest/'

    apikey = 'd86ad6ec041b334fac1e512174ee04d5'

    def __init__(self, username=None, password=None):
        if username is not None and password is None or username is None and password is not None:
            raise ConfigurationError('Username and password must be specified')

        self.username = username
        self.password = password
        self.logged_in = False
        self.login_itasa = False
        self.session = None
        self.auth_code = None

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__version__)

        # login
        if self.username is not None and self.password is not None:
            logger.info('Logging in')
            params = {
                'username': self.username,
                'password': self.password,
                'apikey': self.apikey
            }

            r = self.session.get(self.server_url + 'users/login', params=params, timeout=10)
            root = etree.fromstring(r.content)

            if root.find('status').text == 'fail':
                raise AuthenticationError(root.find('error/message').text)

            self.auth_code = root.find('data/user/authcode').text

            data = {
                'username': self.username,
                'passwd': self.password,
                'remember': 'yes',
                'option': 'com_user',
                'task': 'login',
                'silent': 'true'
            }
            r = self.session.post('http://www.italiansubs.net/index.php', data=data, timeout=30)
            r.raise_for_status()

            self.logged_in = True

    def terminate(self):
        self.session.close()
        self.logged_in = False

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        params = {'apikey': self.apikey}
        r = self.session.get(self.server_url + 'shows', timeout=10, params=params)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        # populate the show ids
        show_ids = {}
        for show in root.findall('data/shows/show'):
            if show.find('name').text is None:  # pragma: no cover
                continue
            show_ids[sanitize(show.find('name').text).lower()] = int(show.find('id').text)
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def _search_show_id(self, series):
        """Search the show id from the `series`

        :param str series: series of the episode.
        :return: the show id, if found.
        :rtype: int or None

        """
        # build the param
        params = {'apikey': self.apikey, 'q': series}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'shows/search', params=params, timeout=10)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        if int(root.find('data/count').text) == 0:
            logger.warning('Show id not found: no suggestion')
            return None

        # Looking for show in first page
        for show in root.findall('data/shows/show'):
            if sanitize(show.find('name').text).lower() == sanitize(series.lower()):
                show_id = int(show.find('id').text)
                logger.debug('Found show id %d', show_id)

                return show_id

        # Not in the first page of result try next (if any)
        next_page = root.find('data/next')
        while next_page.text is not None:  # pragma: no cover

            r = self.session.get(next_page.text, timeout=10)
            r.raise_for_status()
            root = etree.fromstring(r.content)

            logger.info('Loading suggestion page %r', root.find('data/page').text)

            # Looking for show in following pages
            for show in root.findall('data/shows/show'):
                if sanitize(show.find('name').text).lower() == sanitize(series.lower()):
                    show_id = int(show.find('id').text)
                    logger.debug('Found show id %d', show_id)

                    return show_id

            next_page = root.find('data/next')

        # No matches found
        logger.warning('Show id not found: suggestions does not match')

        return None

    def get_show_id(self, series, country_code=None):
        """Get the best matching show id for `series`.

        First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`

        :param str series: series of the episode.
        :param str country_code: the country in which teh show is aired.
        :return: the show id, if found.
        :rtype: int or None

        """
        series_sanitized = sanitize(series).lower()
        show_ids = self._get_show_ids()
        show_id = None

        # attempt with country
        if not show_id and country_code:
            logger.debug('Getting show id with country')
            show_id = show_ids.get('{0} {1}'.format(series_sanitized, country_code.lower()))

        # attempt clean
        if not show_id:
            logger.debug('Getting show id')
            show_id = show_ids.get(series_sanitized)

        # search as last resort
        if not show_id:
            logger.warning('Series not found in show ids')
            show_id = self._search_show_id(series)

        return show_id

    @region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME)
    def _download_zip(self, sub_id):
        # download the subtitle
        logger.info('Downloading subtitle %r', sub_id)

        params = {
            'authcode': self.auth_code,
            'apikey': self.apikey,
            'subtitle_id': sub_id
        }

        r = self.session.get(self.server_url + 'subtitles/download', params=params, timeout=30)
        r.raise_for_status()

        return r.content

    def _get_season_subtitles(self, show_id, season, sub_format):
        params = {
            'apikey': self.apikey,
            'show_id': show_id,
            'q': 'Stagione %{}'.format(season),
            'version': sub_format
        }
        r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        if int(root.find('data/count').text) == 0:
            logger.warning('Subtitles for season not found, try with rip suffix')

            params['version'] = sub_format + 'rip'
            r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30)
            r.raise_for_status()
            root = etree.fromstring(r.content)
            if int(root.find('data/count').text) == 0:
                logger.warning('Subtitles for season not found')
                return []

        subs = []
        # Looking for subtitles in first page
        season_re = re.compile('.*?stagione 0*?{}.*'.format(season))
        for subtitle in root.findall('data/subtitles/subtitle'):
            if season_re.match(subtitle.find('name').text.lower()):
                logger.debug('Found season zip id %d - %r - %r',
                             int(subtitle.find('id').text),
                             subtitle.find('name').text,
                             subtitle.find('version').text)

                content = self._download_zip(int(subtitle.find('id').text))
                if not is_zipfile(io.BytesIO(content)):  # pragma: no cover
                    if 'limite di download' in content:
                        raise TooManyRequests()
                    else:
                        raise ConfigurationError('Not a zip file: {!r}'.format(content))

                with ZipFile(io.BytesIO(content)) as zf:
                    episode_re = re.compile('s(\d{1,2})e(\d{1,2})')
                    for index, name in enumerate(zf.namelist()):
                        match = episode_re.search(name)
                        if not match:  # pragma: no cover
                            logger.debug('Cannot decode subtitle %r', name)
                        else:
                            sub = ItaSASubtitle(
                                int(subtitle.find('id').text),
                                subtitle.find('show_name').text,
                                int(match.group(1)),
                                int(match.group(2)),
                                None,
                                None,
                                None,
                                name)
                            sub.content = fix_line_ending(zf.read(name))
                            subs.append(sub)

        return subs

    def query(self, series, season, episode, video_format, resolution, country=None):

        # To make queries you need to be logged in
        if not self.logged_in:  # pragma: no cover
            raise ConfigurationError('Cannot query if not logged in')

        # get the show id
        show_id = self.get_show_id(series, country)
        if show_id is None:
            logger.error('No show id found for %r ', series)
            return []

        # get the page of the season of the show
        logger.info('Getting the subtitle of show id %d, season %d episode %d, format %r', show_id,
                    season, episode, video_format)
        subtitles = []

        # Default format is SDTV
        if not video_format or video_format.lower() == 'hdtv':
            if resolution in ('1080i', '1080p', '720p'):
                sub_format = resolution
            else:
                sub_format = 'normale'
        else:
            sub_format = video_format.lower()

        # Look for year
        params = {
            'apikey': self.apikey
        }
        r = self.session.get(self.server_url + 'shows/' + str(show_id), params=params, timeout=30)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        year = root.find('data/show/started').text
        if year:
            year = int(year.split('-', 1)[0])
        tvdb_id = root.find('data/show/id_tvdb').text
        if tvdb_id:
            tvdb_id = int(tvdb_id)

        params = {
            'apikey': self.apikey,
            'show_id': show_id,
            'q': '{0}x{1:02}'.format(season, episode),
            'version': sub_format
        }
        r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30)
        r.raise_for_status()
        root = etree.fromstring(r.content)

        if int(root.find('data/count').text) == 0:
            logger.warning('Subtitles not found,  try with rip suffix')

            params['version'] = sub_format + 'rip'
            r = self.session.get(self.server_url + 'subtitles/search', params=params, timeout=30)
            r.raise_for_status()
            root = etree.fromstring(r.content)
            if int(root.find('data/count').text) == 0:
                logger.warning('Subtitles not found, go season mode')

                # If no subtitle are found for single episode try to download all season zip
                subs = self._get_season_subtitles(show_id, season, sub_format)
                if subs:
                    for subtitle in subs:
                        subtitle.format = video_format
                        subtitle.year = year
                        subtitle.tvdb_id = tvdb_id

                    return subs
                else:
                    return []

        # Looking for subtitles in first page
        for subtitle in root.findall('data/subtitles/subtitle'):
            if '{0}x{1:02}'.format(season, episode) in subtitle.find('name').text.lower():
                logger.debug('Found subtitle id %d - %r - %r',
                             int(subtitle.find('id').text),
                             subtitle.find('name').text,
                             subtitle.find('version').text)

                sub = ItaSASubtitle(
                    int(subtitle.find('id').text),
                    subtitle.find('show_name').text,
                    season,
                    episode,
                    video_format,
                    year,
                    tvdb_id,
                    subtitle.find('name').text)

                subtitles.append(sub)

        # Not in the first page of result try next (if any)
        next_page = root.find('data/next')
        while next_page.text is not None:  # pragma: no cover

            r = self.session.get(next_page.text, timeout=30)
            r.raise_for_status()
            root = etree.fromstring(r.content)

            logger.info('Loading subtitles page %r', root.data.page.text)

            # Looking for show in following pages
            for subtitle in root.findall('data/subtitles/subtitle'):
                if '{0}x{1:02}'.format(season, episode) in subtitle.find('name').text.lower():
                    logger.debug('Found subtitle id %d - %r - %r',
                                 int(subtitle.find('id').text),
                                 subtitle.find('name').text,
                                 subtitle.find('version').text)

                    sub = ItaSASubtitle(
                        int(subtitle.find('id').text),
                        subtitle.find('show_name').text,
                        season,
                        episode,
                        video_format,
                        year,
                        tvdb_id,
                        subtitle.find('name').text)

                    subtitles.append(sub)

            next_page = root.find('data/next')

        # Download the subs found, can be more than one in zip
        additional_subs = []
        for sub in subtitles:

            # open the zip
            content = self._download_zip(sub.sub_id)
            if not is_zipfile(io.BytesIO(content)):  # pragma: no cover
                if 'limite di download' in content:
                    raise TooManyRequests()
                else:
                    raise ConfigurationError('Not a zip file: {!r}'.format(content))

            with ZipFile(io.BytesIO(content)) as zf:
                if len(zf.namelist()) > 1:  # pragma: no cover

                    for index, name in enumerate(zf.namelist()):

                        if index == 0:
                            # First element
                            sub.content = fix_line_ending(zf.read(name))
                            sub.full_data = name
                        else:
                            add_sub = copy.deepcopy(sub)
                            add_sub.content = fix_line_ending(zf.read(name))
                            add_sub.full_data = name
                            additional_subs.append(add_sub)
                else:
                    sub.content = fix_line_ending(zf.read(zf.namelist()[0]))
                    sub.full_data = zf.namelist()[0]

        return subtitles + additional_subs

    def list_subtitles(self, video, languages):
        return self.query(video.series, video.season, video.episode, video.format, video.resolution)

    def download_subtitle(self, subtitle):  # pragma: no cover
        pass
コード例 #52
0
ファイル: drission.py プロジェクト: g1879/DrissionPage
class Drission(object):
    """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色"""
    def __init__(self,
                 driver_or_options: Union[RemoteWebDriver, Options,
                                          DriverOptions, bool] = None,
                 session_or_options: Union[Session, dict, SessionOptions,
                                           bool] = None,
                 ini_path: str = None,
                 proxy: dict = None):
        """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象                     \n
        :param driver_or_options: driver对象或DriverOptions、Options类,传入False则创建空配置对象
        :param session_or_options: Session对象或设置字典,传入False则创建空配置对象
        :param ini_path: ini文件路径
        :param proxy: 代理设置
        """
        self._session = None
        self._driver = None
        self._session_options = None
        self._driver_options = None
        self._debugger = None
        self._proxy = proxy

        # ------------------处理session options----------------------
        if session_or_options is None:
            self._session_options = SessionOptions(ini_path=ini_path).as_dict()

        elif session_or_options is False:
            self._session_options = SessionOptions(read_file=False).as_dict()

        elif isinstance(session_or_options, Session):
            self._session = session_or_options

        elif isinstance(session_or_options, SessionOptions):
            self._session_options = session_or_options.as_dict()

        elif isinstance(session_or_options, dict):
            self._session_options = session_or_options

        else:
            raise TypeError(
                'session_or_options参数只能接收Session, dict, SessionOptions或False。')

        # ------------------处理driver options----------------------
        if driver_or_options is None:
            self._driver_options = DriverOptions(ini_path=ini_path)

        elif driver_or_options is False:
            self._driver_options = DriverOptions(read_file=False)

        elif isinstance(driver_or_options, RemoteWebDriver):
            self._driver = driver_or_options

        elif isinstance(driver_or_options, (Options, DriverOptions)):
            self._driver_options = driver_or_options

        else:
            raise TypeError(
                'driver_or_options参数只能接收WebDriver, Options, DriverOptions或False。'
            )

    def __del__(self):
        """关闭对象时关闭浏览器和Session"""
        try:
            self.close()
        except ImportError:
            pass

    @property
    def session(self) -> Session:
        """返回Session对象,如未初始化则按配置信息创建"""
        if self._session is None:
            self._set_session(self._session_options)

            if self._proxy:
                self._session.proxies = self._proxy

        return self._session

    @property
    def driver(self) -> WebDriver:
        """返回WebDriver对象,如未初始化则按配置信息创建。         \n
        如设置了本地调试浏览器,可自动接入或打开浏览器进程。
        """
        if self._driver is None:
            if not self.driver_options.debugger_address and self._proxy:
                self.driver_options.add_argument(
                    f'--proxy-server={self._proxy["http"]}')

            driver_path = self.driver_options.driver_path or 'chromedriver'
            chrome_path = self.driver_options.binary_location or 'chrome.exe'

            # -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程-----------
            if self.driver_options.debugger_address and _check_port(
                    self.driver_options.debugger_address) is False:
                from subprocess import Popen
                port = self.driver_options.debugger_address.split(':')[-1]

                # 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径
                chrome_path, self._debugger = _create_chrome(
                    chrome_path, port, self.driver_options.arguments,
                    self._proxy)

            # -----------创建WebDriver对象-----------
            self._driver = _create_driver(chrome_path, driver_path,
                                          self.driver_options)

            # 反反爬设置
            try:
                self._driver.execute_script(
                    'Object.defineProperty(navigator,"webdriver",{get:() => undefined,});'
                )
            except Exception:
                pass

            # self._driver.execute_cdp_cmd(
            #     'Page.addScriptToEvaluateOnNewDocument',
            #     {'source': 'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'})

        return self._driver

    @property
    def driver_options(self) -> Union[DriverOptions, Options]:
        """返回driver配置信息"""
        return self._driver_options

    @property
    def session_options(self) -> dict:
        """返回session配置信息"""
        return self._session_options

    @session_options.setter
    def session_options(self, options: Union[dict, SessionOptions]) -> None:
        """设置session配置                  \n
        :param options: session配置字典
        :return: None
        """
        self._session_options = _session_options_to_dict(options)
        self._set_session(self._session_options)

    @property
    def proxy(self) -> Union[None, dict]:
        """返回代理信息"""
        return self._proxy

    @proxy.setter
    def proxy(self, proxies: dict = None) -> None:
        """设置代理信息                \n
        :param proxies: 代理信息字典
        :return: None
        """
        self._proxy = proxies

        if self._session:
            self._session.proxies = proxies

        if self._driver:
            cookies = self._driver.get_cookies()
            url = self._driver.current_url
            self._driver.quit()
            self._driver = None
            self._driver = self.driver
            self._driver.get(url)

            for cookie in cookies:
                self.set_cookies(cookie, set_driver=True)

    @property
    def debugger_progress(self):
        """调试浏览器进程"""
        return self._debugger

    def kill_browser(self) -> None:
        """关闭浏览器进程(如果可以)"""
        if self.debugger_progress:
            self.debugger_progress.kill()
            return

        pid = self.get_browser_progress_id()
        from os import popen
        from platform import system

        if pid and system().lower() == 'windows' \
                and popen(f'tasklist | findstr {pid}').read().lower().startswith('chrome.exe'):
            popen(f'taskkill /pid {pid} /F')

        else:
            self._driver.quit()

    def get_browser_progress_id(self) -> Union[str, None]:
        """获取浏览器进程id"""
        if self.debugger_progress:
            return self.debugger_progress.pid

        address = str(self.driver_options.debugger_address).split(':')
        if len(address) == 2:
            ip, port = address
            if ip not in ('127.0.0.1', 'localhost') or not port.isdigit():
                return None

            from os import popen
            txt = ''
            progresses = popen(f'netstat -nao | findstr :{port}').read().split(
                '\n')
            for progress in progresses:
                if 'LISTENING' in progress:
                    txt = progress
                    break
            if not txt:
                return None

            return txt.split(' ')[-1]

    def hide_browser(self) -> None:
        """隐藏浏览器界面"""
        self._show_or_hide_browser()

    def show_browser(self) -> None:
        """显示浏览器界面"""
        self._show_or_hide_browser(False)

    def _show_or_hide_browser(self, hide: bool = True) -> None:
        from platform import system
        if system().lower() != 'windows':
            raise OSError('该方法只能在Windows系统使用。')

        try:
            from win32gui import ShowWindow
            from win32con import SW_HIDE, SW_SHOW
        except ImportError:
            raise ImportError('请先安装:pip install pypiwin32')

        pid = self.get_browser_progress_id()
        if not pid:
            print(
                '只有设置了debugger_address参数才能使用 show_browser() 和 hide_browser()')
            return
        hds = _get_chrome_hwnds_from_pid(pid)
        sw = SW_HIDE if hide else SW_SHOW
        for hd in hds:
            ShowWindow(hd, sw)

    def set_cookies(self,
                    cookies: Union[RequestsCookieJar, list, tuple, str, dict],
                    set_session: bool = False,
                    set_driver: bool = False) -> None:
        """设置cookies                                                      \n
        :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict
        :param set_session: 是否设置session的cookies
        :param set_driver: 是否设置driver的cookies
        :return: None
        """
        cookies = _cookies_to_tuple(cookies)

        for cookie in cookies:
            if cookie['value'] is None:
                cookie['value'] = ''

            # 添加cookie到session
            if set_session:
                kwargs = {
                    x: cookie[x]
                    for x in cookie
                    if x.lower() not in ('name', 'value', 'httponly', 'expiry',
                                         'samesite')
                }

                if 'expiry' in cookie:
                    kwargs['expires'] = cookie['expiry']

                self.session.cookies.set(cookie['name'], cookie['value'],
                                         **kwargs)

            # 添加cookie到driver
            if set_driver:
                if 'expiry' in cookie:
                    cookie['expiry'] = int(cookie['expiry'])

                try:
                    browser_domain = extract(self.driver.current_url).fqdn
                except AttributeError:
                    browser_domain = ''

                if not cookie.get('domain', None):
                    if browser_domain:
                        url = extract(browser_domain)
                        cookie_domain = f'{url.domain}.{url.suffix}'
                    else:
                        raise ValueError('cookie中没有域名或浏览器未访问过URL。')

                    cookie['domain'] = cookie_domain

                else:
                    cookie_domain = cookie['domain'] if cookie['domain'][
                        0] != '.' else cookie['domain'][1:]

                if cookie_domain not in browser_domain:
                    self.driver.get(cookie_domain if cookie_domain.startswith(
                        'http://') else f'http://{cookie_domain}')

                # 避免selenium自动添加.后无法正确覆盖已有cookie
                if cookie['domain'][0] != '.':
                    c = self.driver.get_cookie(cookie['name'])
                    if c and c['domain'] == cookie['domain']:
                        self.driver.delete_cookie(cookie['name'])

                self.driver.add_cookie(cookie)

    def _set_session(self, data: dict) -> None:
        """根据传入字典对session进行设置    \n
        :param data: session配置字典
        :return: None
        """
        if self._session is None:
            self._session = Session()

        attrs = [
            'headers', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert',
            'stream', 'trust_env', 'max_redirects'
        ]  # , 'adapters'

        if 'cookies' in data:
            self.set_cookies(data['cookies'], set_session=True)

        for i in attrs:
            if i in data:
                self._session.__setattr__(i, data[i])

    def cookies_to_session(self, copy_user_agent: bool = False) -> None:
        """把driver对象的cookies复制到session对象    \n
        :param copy_user_agent: 是否复制ua信息
        :return: None
        """
        if copy_user_agent:
            user_agent_to_session(self.driver, self.session)

        self.set_cookies(self.driver.get_cookies(), set_session=True)

    def cookies_to_driver(self, url: str) -> None:
        """把session对象的cookies复制到driver对象  \n
        :param url: 作用域
        :return: None
        """
        browser_domain = extract(self.driver.current_url).fqdn
        ex_url = extract(url)

        if ex_url.fqdn not in browser_domain:
            self.driver.get(url)

        domain = f'{ex_url.domain}.{ex_url.suffix}'

        cookies = []
        for cookie in self.session.cookies:
            if cookie.domain == '':
                cookie.domain = domain

            if domain in cookie.domain:
                cookies.append(cookie)

        self.set_cookies(cookies, set_driver=True)

    def close_driver(self, kill: bool = False) -> None:
        """关闭driver和浏览器"""
        if self._driver:
            if kill:
                self.kill_browser()
            else:
                self._driver.quit()

            self._driver = None

    def close_session(self) -> None:
        """关闭session"""
        if self._session:
            self._session.close()
            self._session = None

    def close(self) -> None:
        """关闭session、driver和浏览器"""
        if self._driver:
            self.close_driver()

        if self._session:
            self.close_session()
コード例 #53
0
ファイル: podnapisi.py プロジェクト: BreizhCat/SickRage
class PodnapisiProvider(Provider):
    languages = {Language("por", "BR"), Language("srp", script="Latn")} | {
        Language.fromalpha2(l) for l in language_converters["alpha2"].codes
    }
    video_types = (Episode, Movie)
    server_url = "http://podnapisi.net/subtitles/"

    def initialize(self):
        self.session = Session()
        self.session.headers = {"User-Agent": "Subliminal/%s" % get_version(__version__)}

    def terminate(self):
        self.session.close()

    def query(self, language, keyword, season=None, episode=None, year=None):
        # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
        params = {"sXML": 1, "sL": str(language), "sK": keyword}
        is_episode = False
        if season and episode:
            is_episode = True
            params["sTS"] = season
            params["sTE"] = episode
        if year:
            params["sY"] = year

        # loop over paginated results
        logger.info("Searching subtitles %r", params)
        subtitles = []
        pids = set()
        while True:
            # query the server
            xml = etree.fromstring(self.session.get(self.server_url + "search/old", params=params, timeout=10).content)

            # exit if no results
            if not int(xml.find("pagination/results").text):
                logger.debug("No subtitles found")
                break

            # loop over subtitles
            for subtitle_xml in xml.findall("subtitle"):
                # read xml elements
                language = Language.fromietf(subtitle_xml.find("language").text)
                hearing_impaired = "n" in (subtitle_xml.find("flags").text or "")
                page_link = subtitle_xml.find("url").text
                pid = subtitle_xml.find("pid").text
                releases = []
                if subtitle_xml.find("release").text:
                    for release in subtitle_xml.find("release").text.split():
                        release = re.sub(r"\.+$", "", release)  # remove trailing dots
                        release = "".join(filter(lambda x: ord(x) < 128, release))  # remove non-ascii characters
                        releases.append(release)
                title = subtitle_xml.find("title").text
                season = int(subtitle_xml.find("tvSeason").text)
                episode = int(subtitle_xml.find("tvEpisode").text)
                year = int(subtitle_xml.find("year").text)

                if is_episode:
                    subtitle = PodnapisiSubtitle(
                        language,
                        hearing_impaired,
                        page_link,
                        pid,
                        releases,
                        title,
                        season=season,
                        episode=episode,
                        year=year,
                    )
                else:
                    subtitle = PodnapisiSubtitle(language, hearing_impaired, page_link, pid, releases, title, year=year)

                # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
                if pid in pids:
                    continue

                logger.debug("Found subtitle %r", subtitle)
                subtitles.append(subtitle)
                pids.add(pid)

            # stop on last page
            if int(xml.find("pagination/current").text) >= int(xml.find("pagination/count").text):
                break

            # increment current page
            params["page"] = int(xml.find("pagination/current").text) + 1
            logger.debug("Getting page %d", params["page"])

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Episode):
            return [
                s
                for l in languages
                for s in self.query(l, video.series, season=video.season, episode=video.episode, year=video.year)
            ]
        elif isinstance(video, Movie):
            return [s for l in languages for s in self.query(l, video.title, year=video.year)]

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info("Downloading subtitle %r")
        r = self.session.get(self.server_url + subtitle.pid + "/download", params={"container": "zip"}, timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            if len(zf.namelist()) > 1:
                raise ProviderError("More than one file to unzip")

            subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
コード例 #54
0
ファイル: podnapisi.py プロジェクト: rcgary/Sub-Zero.bundle
class PodnapisiProvider(Provider):
    """Podnapisi Provider."""
    languages = ({Language('por', 'BR'),
                  Language('srp', script='Latn')} | {
                      Language.fromalpha2(l)
                      for l in language_converters['alpha2'].codes
                  })
    server_url = 'http://podnapisi.net/subtitles/'
    subtitle_class = PodnapisiSubtitle

    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % __short_version__

    def terminate(self):
        self.session.close()

    def query(self, language, keyword, season=None, episode=None, year=None):
        # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
        params = {'sXML': 1, 'sL': str(language), 'sK': keyword}
        is_episode = False
        if season and episode:
            is_episode = True
            params['sTS'] = season
            params['sTE'] = episode
        if year:
            params['sY'] = year

        # loop over paginated results
        logger.info('Searching subtitles %r', params)
        subtitles = []
        pids = set()
        while True:
            # query the server
            xml = etree.fromstring(
                self.session.get(self.server_url + 'search/old',
                                 params=params,
                                 timeout=10).content)

            # exit if no results
            if not int(xml.find('pagination/results').text):
                logger.debug('No subtitles found')
                break

            # loop over subtitles
            for subtitle_xml in xml.findall('subtitle'):
                # read xml elements
                language = Language.fromietf(
                    subtitle_xml.find('language').text)
                hearing_impaired = 'n' in (subtitle_xml.find('flags').text
                                           or '')
                page_link = subtitle_xml.find('url').text
                pid = subtitle_xml.find('pid').text
                releases = []
                if subtitle_xml.find('release').text:
                    for release in subtitle_xml.find('release').text.split():
                        release = re.sub(r'\.+$', '',
                                         release)  # remove trailing dots
                        release = ''.join(
                            filter(lambda x: ord(x) < 128,
                                   release))  # remove non-ascii characters
                        releases.append(release)
                title = subtitle_xml.find('title').text
                season = int(subtitle_xml.find('tvSeason').text)
                episode = int(subtitle_xml.find('tvEpisode').text)
                year = int(subtitle_xml.find('year').text)

                if is_episode:
                    subtitle = self.subtitle_class(language,
                                                   hearing_impaired,
                                                   page_link,
                                                   pid,
                                                   releases,
                                                   title,
                                                   season=season,
                                                   episode=episode,
                                                   year=year)
                else:
                    subtitle = self.subtitle_class(language,
                                                   hearing_impaired,
                                                   page_link,
                                                   pid,
                                                   releases,
                                                   title,
                                                   year=year)

                # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
                if pid in pids:
                    continue

                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)
                pids.add(pid)

            # stop on last page
            if int(xml.find('pagination/current').text) >= int(
                    xml.find('pagination/count').text):
                break

            # increment current page
            params['page'] = int(xml.find('pagination/current').text) + 1
            logger.debug('Getting page %d', params['page'])

        return subtitles

    def list_subtitles(self, video, languages):
        if isinstance(video, Episode):
            return [
                s for l in languages for s in self.query(l,
                                                         video.series,
                                                         season=video.season,
                                                         episode=video.episode,
                                                         year=video.year)
            ]
        elif isinstance(video, Movie):
            return [
                s for l in languages
                for s in self.query(l, video.title, year=video.year)
            ]

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(self.server_url + subtitle.pid + '/download',
                             params={'container': 'zip'},
                             timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            if len(zf.namelist()) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
コード例 #55
0
ファイル: sessions.py プロジェクト: djmitche/txrequests
 def close(self):
     requestsSession.close(self)
     self.pool.stop()
コード例 #56
0
class Vespa(object):
    def __init__(
        self,
        url: str,
        port: Optional[int] = None,
        deployment_message: Optional[List[str]] = None,
        cert: Optional[str] = None,
        output_file: IO = sys.stdout,
    ) -> None:
        """
        Establish a connection with a Vespa application.

        :param url: Vespa instance URL.
        :param port: Vespa instance port.
        :param deployment_message: Message returned by Vespa engine after deployment. Used internally by deploy methods.
        :param cert: Path to certificate and key file.
        :param output_file: Output file to write output messages.

        >>> Vespa(url = "https://cord19.vespa.ai")  # doctest: +SKIP

        >>> Vespa(url = "http://localhost", port = 8080)
        Vespa(http://localhost, 8080)

        >>> Vespa(url = "https://api.vespa-external.aws.oath.cloud", port = 4443, cert = "/path/to/cert-and-key.pem")  # doctest: +SKIP

        """
        self.output_file = output_file
        self.url = url
        self.port = port
        self.deployment_message = deployment_message
        self.cert = cert
        self.http_session = None

        if port is None:
            self.end_point = self.url
        else:
            self.end_point = str(url).rstrip("/") + ":" + str(port)
        self.search_end_point = self.end_point + "/search/"

        self._open_http_session()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def asyncio(self):
        return VespaAsync(self)

    def _open_http_session(self):
        if self.http_session is not None:
            return
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self.http_session = Session()
        self.http_session.mount("https://", adapter)
        self.http_session.mount("http://", adapter)
        return self.http_session

    def _close_http_session(self):
        if self.http_session is None:
            return
        self.http_session.close()

    def close(self):
        self._close_http_session()

    def __repr__(self):
        if self.port:
            return "Vespa({}, {})".format(self.url, self.port)
        else:
            return "Vespa({})".format(self.url)

    def get_application_status(self) -> Optional[Response]:
        """
        Get application status.

        :return:
        """
        end_point = "{}/ApplicationStatus".format(self.end_point)
        try:
            response = self.http_session.get(end_point, cert=self.cert)
        except ConnectionError:
            response = None
        return response

    def _build_query_body(self,
                          query: Optional[str] = None,
                          query_model: Optional[QueryModel] = None,
                          recall: Optional[Tuple] = None,
                          **kwargs) -> Dict:
        assert query is not None, "No 'query' specified."
        assert query_model is not None, "No 'query_model' specified."
        body = query_model.create_body(query=query)
        if recall is not None:
            body.update({
                "recall":
                "+(" + " ".join(
                    ["{}:{}".format(recall[0], str(doc))
                     for doc in recall[1]]) + ")"
            })
        body.update(kwargs)
        return body

    def query(self,
              body: Optional[Dict] = None,
              query: Optional[str] = None,
              query_model: Optional[QueryModel] = None,
              debug_request: bool = False,
              recall: Optional[Tuple] = None,
              **kwargs) -> VespaQueryResponse:
        """
        Send a query request to the Vespa application.

        Either send 'body' containing all the request parameters or specify 'query' and 'query_model'.

        :param body: Dict containing all the request parameters.
        :param query: Query string
        :param query_model: Query model
        :param debug_request: return request body for debugging instead of sending the request.
        :param recall: Tuple of size 2 where the first element is the name of the field to use to recall and the
            second element is a list of the values to be recalled.
        :param kwargs: Additional parameters to be sent along the request.
        :return: Either the request body if debug_request is True or the result from the Vespa application
        """
        body = (self._build_query_body(query, query_model, recall, **kwargs)
                if body is None else body)
        if debug_request:
            return VespaQueryResponse(json={},
                                      status_code=None,
                                      url=None,
                                      request_body=body)
        else:
            r = self.http_session.post(self.search_end_point,
                                       json=body,
                                       cert=self.cert)
        return VespaQueryResponse(json=r.json(),
                                  status_code=r.status_code,
                                  url=str(r.url))

    def feed_data_point(self, schema: str, data_id: str,
                        fields: Dict) -> VespaResponse:
        """
        Feed a data point to a Vespa app.

        :param schema: The schema that we are sending data to.
        :param data_id: Unique id associated with this data point.
        :param fields: Dict containing all the fields required by the `schema`.
        :return: Response of the HTTP POST request.
        """
        end_point = "{}/document/v1/{}/{}/docid/{}".format(
            self.end_point, schema, schema, str(data_id))
        vespa_format = {"fields": fields}
        response = self.http_session.post(end_point,
                                          json=vespa_format,
                                          cert=self.cert)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="feed",
        )

    def _feed_batch_sync(self, schema: str, batch: List[Dict]):
        return [
            self.feed_data_point(schema, data_point["id"],
                                 data_point["fields"]) for data_point in batch
        ]

    async def _feed_batch_async(self, schema: str, batch: List[Dict]):
        async with VespaAsync(self) as async_app:
            return await async_app.feed_batch(schema=schema, batch=batch)

    def feed_batch(self, schema: str, batch: List[Dict], asynchronous=False):
        """
        Feed a batch of data to a Vespa app.

        :param schema: The schema that we are sending data to.
        :param batch: A list of dict containing the keys 'id' and 'fields' to be used in the :func:`feed_data_point`.
        :param asynchronous: Set True to send data in async mode. Default to False. Create and execute the coroutine if
            there is no active running loop. Otherwise it returns the coroutine and requires await to be executed.
        :return: List of HTTP POST responses
        """

        if asynchronous:
            try:
                _ = asyncio.get_running_loop()
                return self._feed_batch_async(schema=schema, batch=batch)
            except RuntimeError:
                return asyncio.run(
                    self._feed_batch_async(schema=schema, batch=batch))
        else:
            return self._feed_batch_sync(schema=schema, batch=batch)

    def delete_data(self, schema: str, data_id: str) -> VespaResponse:
        """
        Delete a data point from a Vespa app.

        :param schema: The schema that we are deleting data from.
        :param data_id: Unique id associated with this data point.
        :return: Response of the HTTP DELETE request.
        """
        end_point = "{}/document/v1/{}/{}/docid/{}".format(
            self.end_point, schema, schema, str(data_id))
        response = self.http_session.delete(end_point, cert=self.cert)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="delete",
        )

    def delete_batch(self, batch: List):
        """
        Async delete a batch of data from a Vespa app.

        :param batch: A list of tuples with 'schema' and 'id'
        :return:
        """
        return [self.delete_data(schema, id) for schema, id in batch]

    def delete_all_docs(self, content_cluster_name: str,
                        schema: str) -> Response:
        """
        Delete all documents associated with the schema

        :param content_cluster_name: Name of content cluster to GET from, or visit.
        :param schema: The schema that we are deleting data from.
        :return: Response of the HTTP DELETE request.
        """
        end_point = "{}/document/v1/{}/{}/docid/?cluster={}&selection=true".format(
            self.end_point, schema, schema, content_cluster_name)
        response = self.http_session.delete(end_point, cert=self.cert)
        return response

    def get_data(self, schema: str, data_id: str) -> Response:
        """
        Get a data point from a Vespa app.

        :param schema: The schema that we are getting data from.
        :param data_id: Unique id associated with this data point.
        :return: Response of the HTTP GET request.
        """
        end_point = "{}/document/v1/{}/{}/docid/{}".format(
            self.end_point, schema, schema, str(data_id))
        response = self.http_session.get(end_point, cert=self.cert)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="get",
        )

    def get_batch(self, batch: List):
        """
        Async get a batch of data from a Vespa app.

        :param batch: A list of tuples with 'schema' and 'id'.
        :return:
        """
        return [self.get_data(schema, id) for schema, id in batch]

    def update_data(self,
                    schema: str,
                    data_id: str,
                    fields: Dict,
                    create: bool = False) -> VespaResponse:
        """
        Update a data point in a Vespa app.

        :param schema: The schema that we are updating data.
        :param data_id: Unique id associated with this data point.
        :param fields: Dict containing all the fields you want to update.
        :param create: If true, updates to non-existent documents will create an empty document to update
        :return: Response of the HTTP PUT request.
        """
        end_point = "{}/document/v1/{}/{}/docid/{}?create={}".format(
            self.end_point, schema, schema, str(data_id),
            str(create).lower())
        vespa_format = {
            "fields": {k: {
                "assign": v
            }
                       for k, v in fields.items()}
        }
        response = self.http_session.put(end_point,
                                         json=vespa_format,
                                         cert=self.cert)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="update",
        )

    def update_batch(self, batch: List):
        """
        Update a batch of data points.

        :param batch: A list of tuples with 'schema', 'id', 'fields', and 'create'
        :return:
        """
        return [
            self.update_data(schema, id, fields, create)
            for schema, id, fields, create in batch
        ]

    @staticmethod
    def annotate_data(hits, query_id, id_field, relevant_id, fields,
                      relevant_score, default_score):
        data = []
        for h in hits:
            record = {}
            record.update({"document_id": h["fields"][id_field]})
            record.update({"query_id": query_id})
            record.update({
                "label":
                relevant_score
                if h["fields"][id_field] == relevant_id else default_score
            })
            for field in fields:
                field_value = h["fields"].get(field, None)
                if field_value:
                    if isinstance(field_value, dict):
                        record.update(field_value)
                    else:
                        record.update({field: field_value})
            data.append(record)
        return data

    def collect_training_data_point(self,
                                    query: str,
                                    query_id: str,
                                    relevant_id: str,
                                    id_field: str,
                                    query_model: QueryModel,
                                    number_additional_docs: int,
                                    fields: List[str],
                                    relevant_score: int = 1,
                                    default_score: int = 0,
                                    **kwargs) -> List[Dict]:
        """
        Collect training data based on a single query

        :param query: Query string.
        :param query_id: Query id represented as str.
        :param relevant_id: Relevant id represented as a str.
        :param id_field: The Vespa field representing the document id.
        :param query_model: Query model.
        :param number_additional_docs: Number of additional documents to retrieve for each relevant document.
        :param fields: Which fields should be retrieved.
        :param relevant_score: Score to assign to relevant documents. Default to 1.
        :param default_score: Score to assign to the additional documents that are not relevant. Default to 0.
        :param kwargs: Extra keyword arguments to be included in the Vespa Query.
        :return: List of dicts containing the document id (document_id), query id (query_id), scores (relevant)
            and vespa rank features returned by the Query model RankProfile used.
        """

        relevant_id_result = self.query(query=query,
                                        query_model=query_model,
                                        recall=(id_field, [relevant_id]),
                                        **kwargs)
        hits = relevant_id_result.hits
        features = []
        if len(hits) == 1 and hits[0]["fields"][id_field] == relevant_id:
            if number_additional_docs > 0:
                random_hits_result = self.query(query=query,
                                                query_model=query_model,
                                                hits=number_additional_docs,
                                                **kwargs)
                hits.extend(random_hits_result.hits)

            features = self.annotate_data(
                hits=hits,
                query_id=query_id,
                id_field=id_field,
                relevant_id=relevant_id,
                fields=fields,
                relevant_score=relevant_score,
                default_score=default_score,
            )
        return features

    def collect_training_data(self,
                              labeled_data: List[Dict],
                              id_field: str,
                              query_model: QueryModel,
                              number_additional_docs: int,
                              relevant_score: int = 1,
                              default_score: int = 0,
                              show_progress: Optional[int] = None,
                              **kwargs) -> DataFrame:
        """
        Collect training data based on a set of labelled data.

        :param labeled_data: Labelled data containing query, query_id and relevant ids.
        :param id_field: The Vespa field representing the document id.
        :param query_model: Query model.
        :param number_additional_docs: Number of additional documents to retrieve for each relevant document.
        :param relevant_score: Score to assign to relevant documents. Default to 1.
        :param default_score: Score to assign to the additional documents that are not relevant. Default to 0.
        :param show_progress: Prints the the current point being collected every `show_progress` step. Default to None,
            in which case progress is not printed.
        :param kwargs: Extra keyword arguments to be included in the Vespa Query.
        :return: DataFrame containing document id (document_id), query id (query_id), scores (relevant)
            and vespa rank features returned by the Query model RankProfile used.
        """

        training_data = []
        number_queries = len(labeled_data)
        idx_total = 0
        for query_idx, query_data in enumerate(labeled_data):
            number_relevant_docs = len(query_data["relevant_docs"])
            for doc_idx, doc_data in enumerate(query_data["relevant_docs"]):
                idx_total += 1
                if (show_progress is not None) and (idx_total % show_progress
                                                    == 0):
                    print(
                        "Query {}/{}, Doc {}/{}. Query id: {}. Doc id: {}".
                        format(
                            query_idx,
                            number_queries,
                            doc_idx,
                            number_relevant_docs,
                            query_data["query_id"],
                            doc_data["id"],
                        ),
                        file=self.output_file,
                    )
                training_data_point = self.collect_training_data_point(
                    query=query_data["query"],
                    query_id=query_data["query_id"],
                    relevant_id=doc_data["id"],
                    id_field=id_field,
                    query_model=query_model,
                    number_additional_docs=number_additional_docs,
                    relevant_score=doc_data.get("score", relevant_score),
                    default_score=default_score,
                    **kwargs)
                training_data.extend(training_data_point)
        training_data = DataFrame.from_records(training_data)
        return training_data

    def evaluate_query(self,
                       eval_metrics: List[EvalMetric],
                       query_model: QueryModel,
                       query_id: str,
                       query: str,
                       id_field: str,
                       relevant_docs: List[Dict],
                       default_score: int = 0,
                       detailed_metrics=False,
                       **kwargs) -> Dict:
        """
        Evaluate a query according to evaluation metrics

        :param eval_metrics: A list of evaluation metrics.
        :param query_model: Query model.
        :param query_id: Query id represented as str.
        :param query: Query string.
        :param id_field: The Vespa field representing the document id.
        :param relevant_docs: A list with dicts where each dict contains a doc id a optionally a doc score.
        :param default_score: Score to assign to the additional documents that are not relevant. Default to 0.
        :param detailed_metrics: Return intermediate computations if available.
        :param kwargs: Extra keyword arguments to be included in the Vespa Query.
        :return: Dict containing query_id and metrics according to the selected evaluation metrics.
        """

        query_results = self.query(query=query,
                                   query_model=query_model,
                                   **kwargs)
        evaluation = {"model": query_model.name, "query_id": query_id}
        for evaluator in eval_metrics:
            evaluation.update(
                evaluator.evaluate_query(
                    query_results,
                    relevant_docs,
                    id_field,
                    default_score,
                    detailed_metrics,
                ))
        return evaluation

    def evaluate(self,
                 labeled_data: Union[List[Dict], DataFrame],
                 eval_metrics: List[EvalMetric],
                 query_model: Union[QueryModel, List[QueryModel]],
                 id_field: str,
                 default_score: int = 0,
                 detailed_metrics=False,
                 per_query=False,
                 aggregators=None,
                 **kwargs) -> DataFrame:
        """
        Evaluate a :class:`QueryModel` according to a list of :class:`EvalMetric`.

        labeled_data can be a DataFrame or a List of Dict:

        >>> labeled_data_df = DataFrame(
        ...     data={
        ...         "qid": [0, 0, 1, 1],
        ...         "query": ["Intrauterine virus infections and congenital heart disease", "Intrauterine virus infections and congenital heart disease", "Clinical and immunologic studies in identical twins discordant for systemic lupus erythematosus", "Clinical and immunologic studies in identical twins discordant for systemic lupus erythematosus"],
        ...         "doc_id": [0, 3, 1, 5],
        ...         "relevance": [1,1,1,1]
        ...     }
        ... )

        >>> labeled_data = [
        ...     {
        ...         "query_id": 0,
        ...         "query": "Intrauterine virus infections and congenital heart disease",
        ...         "relevant_docs": [{"id": 0, "score": 1}, {"id": 3, "score": 1}]
        ...     },
        ...     {
        ...         "query_id": 1,
        ...         "query": "Clinical and immunologic studies in identical twins discordant for systemic lupus erythematosus",
        ...         "relevant_docs": [{"id": 1, "score": 1}, {"id": 5, "score": 1}]
        ...     }
        ... ]

        :param labeled_data: Labelled data containing query, query_id and relevant ids. See details about data format.
        :param eval_metrics: A list of evaluation metrics.
        :param query_model: Accept a Query model or a list of Query Models.
        :param id_field: The Vespa field representing the document id.
        :param default_score: Score to assign to the additional documents that are not relevant. Default to 0.
        :param detailed_metrics: Return intermediate computations if available.
        :param per_query: Set to True to return evaluation metrics per query.
        :param aggregators: Used only if `per_query=False`. List of pandas friendly aggregators to summarize per model
            metrics. We use ["mean", "median", "std"] by default.
        :param kwargs: Extra keyword arguments to be included in the Vespa Query.
        :return: DataFrame containing query_id and metrics according to the selected evaluation metrics.
        """
        if isinstance(labeled_data, DataFrame):
            labeled_data = parse_labeled_data(df=labeled_data)

        if isinstance(query_model, QueryModel):
            query_model = [query_model]

        model_names = [model.name for model in query_model]
        assert len(model_names) == len(set(
            model_names)), "Duplicate model names. Choose unique model names."

        evaluation = []
        for query_data in labeled_data:
            for model in query_model:
                evaluation_query = self.evaluate_query(
                    eval_metrics=eval_metrics,
                    query_model=model,
                    query_id=query_data["query_id"],
                    query=query_data["query"],
                    id_field=id_field,
                    relevant_docs=query_data["relevant_docs"],
                    default_score=default_score,
                    detailed_metrics=detailed_metrics,
                    **kwargs)
                evaluation.append(evaluation_query)
        evaluation = DataFrame.from_records(evaluation)
        if not per_query:
            if not aggregators:
                aggregators = ["mean", "median", "std"]
            evaluation = (evaluation[[
                x for x in evaluation.columns if x != "query_id"
            ]].groupby(by="model").agg(aggregators).T)
        return evaluation
コード例 #57
0
class DatabaseConnection(object):

    def __init__(self, user_or_apikey=None, user_password=None, url="https://connectordb.com"):

        # Set up the API URL
        if not url.startswith("http"):
            url = "https://" + url
        if not url.endswith("/"):
            url = url + "/"
        self.baseurl = url
        self.url = urljoin(url, "/api/v1/")

        # Set up a session, which allows us to reuse connections
        self.r = Session()
        self.r.headers.update({'content-type': 'application/json'})

        # Prepare the websocket
        self.ws = WebsocketHandler(self.url, None)

        # Set the authentication if any
        self.setauth(user_or_apikey, user_password)

        # Now set up the login path so we know what we're logged in as
        if user_password is not None:
            self.path = user_or_apikey + "/user"
        else:
            self.path = self.ping()

    def setauth(self, user_or_apikey=None, user_password=None):
        """ setauth sets the authentication header for use in the session.
        It is for use when apikey is updated or something of the sort, such that
        there is a seamless experience. """
        auth = None
        if user_or_apikey is not None:
            # ConnectorDB allows login using both basic auth or an apikey url param.
            # The python client uses basic auth for all logins
            if user_password is None:
                # Login by api key - the basic auth login uses "" user and
                # apikey as password
                user_password = user_or_apikey
                user_or_apikey = ""
            auth = HTTPBasicAuth(user_or_apikey, user_password)
            self.r.auth = auth

        # Set the websocket's authentication
        self.ws.setauth(auth)

    def close(self):
        """Closes the active connections to ConnectorDB"""
        self.r.close()

    def handleresult(self, r):
        """Handles HTTP error codes for the given request

        Raises:
            AuthenticationError on the appropriate 4** errors
            ServerError if the response is not an ok (2**)

        Arguments:
            r -- The request result
        """
        if r.status_code >= 400 and r.status_code < 500:
            msg = r.json()
            raise AuthenticationError(str(msg["code"]) + ": " + msg["msg"] +
                                      " (" + msg["ref"] + ")")
        elif r.status_code > 300:
            err = None
            try:
                msg = r.json()
                err = ServerError(str(msg["code"]) + ": " + msg["msg"] + " (" +
                                  msg["ref"] + ")")
            except:
                raise ServerError(
                    "Server returned error, but did not give a valid error message")
            raise err
        return r

    def ping(self):
        """Attempts to ping the server using current credentials, and responds with the path of the currently
        authenticated device"""
        return self.handleresult(self.r.get(self.url,
                                            params={"q": "this"})).text

    def query(self, query_type, query=None):
        """Run the given query on the connection (POST request to /query)"""
        return self.handleresult(self.r.post(urljoin(self.url + "query/",
                                                     query_type),
                                             data=json.dumps(query))).json()

    def create(self, path, data=None):
        """Send a POST CRUD API request to the given path using the given data which will be converted
        to json"""
        return self.handleresult(self.r.post(urljoin(self.url + CRUD_PATH,
                                                     path),
                                             data=json.dumps(data)))

    def read(self, path, params=None):
        """Read the result at the given path (GET) from the CRUD API, using the optional params dictionary
        as url parameters."""
        return self.handleresult(self.r.get(urljoin(self.url + CRUD_PATH,
                                                    path),
                                            params=params))

    def update(self, path, data=None):
        """Send an update request to the given path of the CRUD API, with the given data dict, which will be converted
        into json"""
        return self.handleresult(self.r.put(urljoin(self.url + CRUD_PATH,
                                                    path),
                                            data=json.dumps(data)))

    def delete(self, path):
        """Send a delete request to the given path of the CRUD API. This deletes the object. Or at least tries to."""
        return self.handleresult(self.r.delete(urljoin(self.url + CRUD_PATH,
                                                       path)))

    def get(self, path, params=None):
        """Sends a get request to the given path in the database and with optional URL parameters"""
        return self.handleresult(self.r.get(urljoin(self.url, path),
                                            params=params))

    def subscribe(self, stream, callback, transform=""):
        """Subscribe to the given stream with the callback"""
        return self.ws.subscribe(stream, callback, transform)

    def unsubscribe(self, stream, transform=""):
        """Unsubscribe from the given stream"""
        return self.ws.unsubscribe(stream, transform)

    def wsdisconnect(self):
        """Disconnects the websocket"""
        self.ws.disconnect()
コード例 #58
0
    def _load(self,
              offset=0,
              limit=10,
              resulttype='results',
              identifier=None,
              bbox=[],
              datetime_=None,
              properties=[],
              sortby=[],
              select_properties=[],
              skip_geometry=False,
              q=None):
        """
        Private function: Load STA data

        :param offset: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)
        :param q: full-text search term(s)

        :returns: dict of GeoJSON FeatureCollection
        """
        feature_collection = {'type': 'FeatureCollection', 'features': []}
        # Make params
        params = {
            '$expand': EXPAND[self.entity],
            '$skip': str(offset),
            '$top': str(limit),
            '$count': 'true'
        }
        if properties or bbox or datetime_:
            params['$filter'] = self._make_filter(properties, bbox, datetime_)
        if sortby:
            params['$orderby'] = self._make_orderby(sortby)

        # Start session
        s = Session()

        # Form URL for GET request
        LOGGER.debug('Sending query')
        if identifier:
            r = s.get(f'{self._url}({identifier})', params=params)
        else:
            r = s.get(self._url, params=params)

        if r.status_code == codes.bad:
            LOGGER.error('Bad http response code')
            raise ProviderConnectionError('Bad http response code')
        response = r.json()

        # if hits, return count
        if resulttype == 'hits':
            LOGGER.debug('Returning hits')
            feature_collection['numberMatched'] = response.get('@iot.count')
            return feature_collection

        # Query if values are less than expected
        v = [
            response,
        ] if identifier else response.get('value')
        hits_ = 1 if identifier else min(limit, response.get('@iot.count'))
        while len(v) < hits_:
            LOGGER.debug('Fetching next set of values')
            next_ = response.get('@iot.nextLink', None)
            if next_ is None:
                break
            else:
                with s.get(next_) as r:
                    response = r.json()
                    v.extend(response.get('value'))

        # End session
        s.close()

        # Properties filter & display
        keys = (() if not self.properties and not select_properties else
                set(self.properties) | set(select_properties))

        for entity in v[:hits_]:
            # Make feature
            id = entity.pop(self.id_field)
            id = f"'{id}'" if isinstance(id, str) else str(id)
            f = {
                'type': 'Feature',
                'properties': {},
                'geometry': None,
                'id': id
            }

            # Make geometry
            if not skip_geometry:
                f['geometry'] = self._geometry(entity)

            # Fill properties block
            try:
                f['properties'] = self._expand_properties(entity, keys)
            except KeyError as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

            feature_collection['features'].append(f)

        feature_collection['numberReturned'] = len(
            feature_collection['features'])

        if identifier:
            return f
        else:
            return feature_collection
コード例 #59
0
ファイル: tvsubtitles.py プロジェクト: Indigo744/SickRage
class TVsubtitlesProvider(Provider):
    languages = {Language('por', 'BR')} | {Language(l) for l in [
        'ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra', 'hun', 'ita', 'jpn', 'kor', 'nld', 'pol', 'por',
        'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho'
    ]}
    video_types = (Episode,)
    server_url = 'http://www.tvsubtitles.net/'

    def initialize(self):
        self.session = Session()
        self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__

    def terminate(self):
        self.session.close()

    @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
    def search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int or None
        :return: the show id, if any.
        :rtype: int or None

        """
        # make the search
        logger.info('Searching show id for %r', series)
        r = self.session.post(self.server_url + 'search.php', data={'q': series}, timeout=10)
        r.raise_for_status()

        # get the series out of the suggestions
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        show_id = None
        for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
            match = link_re.match(suggestion.text)
            if not match:
                logger.error('Failed to match %s', suggestion.text)
                continue

            if match.group('series').lower() == series.lower():
                if year is not None and int(match.group('first_year')) != year:
                    logger.debug('Year does not match')
                    continue
                show_id = int(suggestion['href'][8:-5])
                logger.debug('Found show id %d', show_id)
                break

        return show_id

    @region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME)
    def get_episode_ids(self, show_id, season):
        """Get episode ids from the show id and the season.

        :param int show_id: show id.
        :param int season: season of the episode.
        :return: episode ids per episode number.
        :rtype: dict

        """
        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id, season)
        r = self.session.get(self.server_url + 'tvshow-%d-%d.html' % (show_id, season), timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over episode rows
        episode_ids = {}
        for row in soup.select('table#table5 tr'):
            # skip rows that do not have a link to the episode page
            if not row('a', href=episode_id_re):
                continue

            # extract data from the cells
            cells = row('td')
            episode = int(cells[0].text.split('x')[1])
            episode_id = int(cells[1].a['href'][8:-5])
            episode_ids[episode] = episode_id

        if episode_ids:
            logger.debug('Found episode ids %r', episode_ids)
        else:
            logger.warning('No episode ids found')

        return episode_ids

    def query(self, series, season, episode, year=None):
        # search the show id
        show_id = self.search_show_id(series, year)
        if show_id is None:
            logger.error('No show id found for %r (%r)', series, {'year': year})
            return []

        # get the episode ids
        episode_ids = self.get_episode_ids(show_id, season)
        if episode not in episode_ids:
            logger.error('Episode %d not found', episode)
            return []

        # get the episode page
        logger.info('Getting the page for episode %d', episode_ids[episode])
        r = self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitles rows
        subtitles = []
        for row in soup.select('.subtitlen'):
            # read the item
            language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
            subtitle_id = int(row.parent['href'][10:-5])
            page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
            rip = row.find('p', title='rip').text.strip() or None
            release = row.find('p', title='release').text.strip() or None

            subtitle = TVsubtitlesSubtitle(language, page_link, subtitle_id, series, season, episode, year, rip,
                                           release)
            logger.debug('Found subtitle %s', subtitle)
            subtitles.append(subtitle)

        return subtitles

    def list_subtitles(self, video, languages):
        return [s for s in self.query(video.series, video.season, video.episode, video.year) if s.language in languages]

    def download_subtitle(self, subtitle):
        # download as a zip
        logger.info('Downloading subtitle %r', subtitle)
        r = self.session.get(self.server_url + 'download-%d.html' % subtitle.subtitle_id, timeout=10)
        r.raise_for_status()

        # open the zip
        with ZipFile(io.BytesIO(r.content)) as zf:
            if len(zf.namelist()) > 1:
                raise ProviderError('More than one file to unzip')

            subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
コード例 #60
-1
ファイル: shooter.py プロジェクト: t4lwh/subliminal
class ShooterProvider(Provider):
    languages = {Language.fromalpha2(l) for l in ["zh"]}
    required_hash = "shooter"

    def initialize(self):
        self.session = Session()
        self.session.headers = {"User-Agent": "SPlayer Build 2437"}

    def terminate(self):
        self.session.close()

    def query(self, hash):
        # shooter has many DNS mirrors, e.g. splayer[1-9], but one is enough
        params = {"pathinfo": "temp", "format": "json", "filehash": hash}
        logger.info("Searching subtitles %r", params)

        r = self.session.get("https://www.shooter.cn/api/subapi.php", params=params, timeout=10)
        r.raise_for_status()

        # loop over, server always returns found or not
        subtitles = []
        try:
            for it in r.json():
                # It normally contains one File, but can contain multiple
                link = it["Files"][0]["Link"]
                subtype = it["Files"][0]["Ext"]
                subtitle = ShooterSubtitle(Language.fromalpha2("zh"), hash, link, subtype)
                logger.debug("Found subtitle %r", subtitle)

                subtitles.append(subtitle)

            return subtitles
        except:
            logger.debug("No subtitle found")
            return []

    def list_subtitles(self, video, languages):
        return [s for s in self.query(video.hashes["shooter"]) if s.language in languages]

    def download_subtitle(self, subtitle):
        logger.info("Download subtitle %r", subtitle.link)
        r = self.session.get(subtitle.link, params=None, timeout=10)
        r.raise_for_status()

        subtitle.content = fix_line_ending(r.content)