Beispiel #1
0
def _scrape_vimeo_callback(info, callback):
    url = info['redirected-url']
    try:
        doc = minidom.parseString(info['body'])
        id_ = VIMEO_CLIP_RE.match(url).group(1)
        req_sig = doc.getElementsByTagName('request_signature').item(0).firstChild.data.decode('ascii', 'replace')
        req_sig_expires = doc.getElementsByTagName('request_signature_expires').item(0).firstChild.data.decode('ascii', 'replace')
        url = (u"http://www.vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=" %
               (id_, req_sig, req_sig_expires))
        hd_url = url + 'hd'
        sd_url = url + 'sd'
        httpclient.grab_headers(hd_url,
                                lambda x: callback(hd_url),
                                lambda x: callback(sd_url))
    except StandardError:
        logging.exception("Unable to scrape XML for vimeo.com video URL: %s", url)
        callback(None)
Beispiel #2
0
def _scrape_vimeo_callback(info, callback):
    url = info['redirected-url']
    try:
        doc = minidom.parseString(info['body'])
        id_ = VIMEO_CLIP_RE.match(url).group('id_')
        req_sig = doc.getElementsByTagName('request_signature').item(
            0).firstChild.data.decode('ascii', 'replace')
        req_sig_expires = doc.getElementsByTagName(
            'request_signature_expires').item(0).firstChild.data.decode(
                'ascii', 'replace')
        url = (u"http://www.vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=" %
               (id_, req_sig, req_sig_expires))
        hd_url = url + 'hd'
        sd_url = url + 'sd'
        httpclient.grab_headers(hd_url, lambda x: callback(hd_url),
                                lambda x: callback(sd_url))
    except StandardError:
        logging.exception("Unable to scrape XML for vimeo.com video URL: %s",
                          url)
        callback(None)
Beispiel #3
0
 def grab_headers(self, url, *args, **kwargs):
     self.grab_url_error = self.grab_url_info = None
     self.client = httpclient.grab_headers(url, self.grab_url_callback,
             self.grab_url_errback, *args, **kwargs)
     self.runEventLoop(timeout=self.event_loop_timeout)
Beispiel #4
0
 def get_content_type(self):
     if is_magnet_uri(self.url):
         self.content_type = u'application/x-magnet'
         return 
     httpclient.grab_headers(self.url, self.on_content_type,
                             self.on_content_type_error)
Beispiel #5
0
 def callback(dialog):
     if dialog.choice == dialogs.BUTTON_RETRY:
         httpclient.grab_headers(url, callback, errback)
Beispiel #6
0
def add_download(url, handle_unknown_callback=None, metadata=None):
    """Given a url, this tries to figure out what it is (video, audio,
    torrent, rss feed, flash file that Miro can scrape) and handles it
    accordingly.

    If it can't figure out what it is, then it calls
    ``handle_unknown_callback`` with the url of the thing it can't
    identify and thus doesn't know what to do with.

    :param url: The url to download.

    :param handle_unknown_callback: The function to call if Miro can't
        figure out what kind of thing is at the url.  If this is None,
        then it uses the default ``download_unknown_mime_type``
        handler.

    :param metadata: dict holding additional metadata like title,
        description, ...
    """
    if handle_unknown_callback == None:
        handle_unknown_callback = download_unknown_mime_type

    if url.startswith('feed:') or url.startswith('feeds:'):
        # hack so feed(s): acts as http(s):
        url = "http" + url[4:]

    if check_url_exists(url):
        return

    def errback(error):
        title = _("Download Error")
        text = _(
            "%(appname)s is not able to download a file at this URL:\n"
            "\n"
            "URL: %(url)s\n"
            "\n"
            "Error: %(error)s (%(errordesc)s)", {
                "url": url,
                "appname": app.config.get(prefs.SHORT_APP_NAME),
                "error": error.getFriendlyDescription(),
                "errordesc": error.getLongDescription()
            })
        logging.info("can't download '%s'", url)
        if not isinstance(error, httpclient.PossiblyTemporaryError):
            dialogs.MessageBoxDialog(title, text).run()
        else:

            def callback(dialog):
                if dialog.choice == dialogs.BUTTON_RETRY:
                    httpclient.grab_headers(url, callback, errback)

            dialogs.ChoiceDialog(title, text, dialogs.BUTTON_RETRY,
                                 dialogs.BUTTON_CANCEL).run(callback)

    def callback_peek(data):
        """Takes the data returned from a GET and peeks at it to see
        if it's a feed despite the fact that it has the wrong
        content-type.
        """
        if data["body"]:
            if filetypes.is_maybe_rss(data["body"]):
                # FIXME - this is silly since we just did a GET and we
                # do another one in add_feeds
                logging.info("%s is a feed--adding it." % url)
                add_feeds([url])
                return

        handle_unknown_callback(url)

    def callback(headers, content_type=None):
        """We need to figure out if the URL is a external video link,
        or a link to a feed.
        """
        print 'callback for', url, headers, content_type
        if check_url_exists(url):
            return

        if content_type is None:
            content_type = headers.get("content-type")

        if content_type:
            if filetypes.is_feed_content_type(content_type):
                add_feeds([url])
                return

            if flashscraper.is_maybe_flashscrapable(url):
                entry = _build_entry(url, 'video/x-flv', additional=metadata)
                download_video(entry)
                return

            if amazon.is_amazon_content_type(content_type):
                amazon.download_file(url, handle_unknown_callback)
                return

            if filetypes.is_maybe_feed_content_type(content_type):
                logging.info(
                    "%s content type is %s.  "
                    "going to peek to see if it's a feed....", url,
                    content_type)
                httpclient.grab_url(url, callback_peek, errback)
                return

        entry = _build_entry(url, content_type)

        if filetypes.is_video_enclosure(entry['enclosures'][0]):
            download_video(entry)
        else:
            handle_unknown_callback(url)

    if metadata and metadata.get('mime_type'):
        # we've already got the mime type, don't do another call
        callback(None, metadata['mime_type'])
    elif is_magnet_uri(url):
        callback(None, 'application/x-magnet')
    elif amazon.is_amazon_url(url):
        amazon.download_file(url, handle_unknown_callback)
    elif emusic.is_emusic_url(url):
        emusic.download_file(url, handle_unknown_callback)
    else:
        httpclient.grab_headers(url, callback, errback)
Beispiel #7
0
 def get_content_type(self):
     if is_magnet_uri(self.url):
         self.content_type = u'application/x-magnet'
         return 
     httpclient.grab_headers(self.url, self.on_content_type,
                             self.on_content_type_error)
Beispiel #8
0
def _scrape_break_video_url(url, callback):
    httpclient.grab_headers(url, lambda x: _scrape_break_callback(x, callback),
                           lambda x: _scrape_break_errback(x, callback))
Beispiel #9
0
 def get_content_type(self):
     httpclient.grab_headers(self.url, self.on_content_type,
                             self.on_content_type_error)
Beispiel #10
0
def _scrape_break_video_url(url, callback):
    httpclient.grab_headers(url, lambda x: _scrape_break_callback(x, callback),
                           lambda x: _scrape_break_errback(x, callback))
Beispiel #11
0
 def callback(dialog):
     if dialog.choice == dialogs.BUTTON_RETRY:
         httpclient.grab_headers(url, callback, errback)
Beispiel #12
0
def add_download(url, handle_unknown_callback=None, metadata=None):
    """Given a url, this tries to figure out what it is (video, audio,
    torrent, rss feed, flash file that Miro can scrape) and handles it
    accordingly.

    If it can't figure out what it is, then it calls
    ``handle_unknown_callback`` with the url of the thing it can't
    identify and thus doesn't know what to do with.

    :param url: The url to download.

    :param handle_unknown_callback: The function to call if Miro can't
        figure out what kind of thing is at the url.  If this is None,
        then it uses the default ``download_unknown_mime_type``
        handler.

    :param metadata: dict holding additional metadata like title,
        description, ...
    """
    if handle_unknown_callback == None:
        handle_unknown_callback = download_unknown_mime_type

    if url.startswith("feed:") or url.startswith("feeds:"):
        # hack so feed(s): acts as http(s):
        url = "http" + url[4:]

    if check_url_exists(url):
        return

    def errback(error):
        title = _("Download Error")
        text = _(
            "%(appname)s is not able to download a file at this URL:\n"
            "\n"
            "URL: %(url)s\n"
            "\n"
            "Error: %(error)s (%(errordesc)s)",
            {
                "url": url,
                "appname": app.config.get(prefs.SHORT_APP_NAME),
                "error": error.getFriendlyDescription(),
                "errordesc": error.getLongDescription(),
            },
        )
        logging.info("can't download '%s'", url)
        if not isinstance(error, httpclient.PossiblyTemporaryError):
            dialogs.MessageBoxDialog(title, text).run()
        else:

            def callback(dialog):
                if dialog.choice == dialogs.BUTTON_RETRY:
                    httpclient.grab_headers(url, callback, errback)

            dialogs.ChoiceDialog(title, text, dialogs.BUTTON_RETRY, dialogs.BUTTON_CANCEL).run(callback)

    def callback_peek(data):
        """Takes the data returned from a GET and peeks at it to see
        if it's a feed despite the fact that it has the wrong
        content-type.
        """
        if data["body"]:
            if filetypes.is_maybe_rss(data["body"]):
                # FIXME - this is silly since we just did a GET and we
                # do another one in add_feeds
                logging.info("%s is a feed--adding it." % url)
                add_feeds([url])
                return

        handle_unknown_callback(url)

    def callback(headers):
        """We need to figure out if the URL is a external video link,
        or a link to a feed.
        """
        if check_url_exists(url):
            return

        content_type = headers.get("content-type")
        if content_type:
            if filetypes.is_feed_content_type(content_type):
                add_feeds([url])
                return

            if flashscraper.is_maybe_flashscrapable(url):
                entry = _build_entry(url, "video/x-flv", additional=metadata)
                download_video(entry)
                return

            if filetypes.is_maybe_feed_content_type(content_type):
                logging.info("%s content type is %s.  " "going to peek to see if it's a feed....", url, content_type)
                httpclient.grab_url(url, callback_peek, errback)
                return

        entry = _build_entry(url, content_type)

        if filetypes.is_video_enclosure(entry["enclosures"][0]):
            download_video(entry)
        else:
            handle_unknown_callback(url)

    httpclient.grab_headers(url, callback, errback)