Beispiel #1
0
 def open(self, fullurl, data=None, method=None):
     """Use URLopener().open(file) instead of open(file, 'r')."""
     fullurl = unwrap(toBytes(fullurl))
     # percent encode url, fixing lame server errors for e.g, like space
     # within url paths.
     fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
     if self.tempcache and fullurl in self.tempcache:
         filename, headers = self.tempcache[fullurl]
         fp = open(filename, 'rb')
         return addinfourl(fp, headers, fullurl)
     urltype, url = splittype(fullurl)
     if not urltype:
         urltype = 'file'
     if urltype in self.proxies:
         proxy = self.proxies[urltype]
         urltype, proxyhost = splittype(proxy)
         host, selector = splithost(proxyhost)
         url = (host, fullurl) # Signal special case to open_*()
     else:
         proxy = None
     name = 'open_' + urltype
     self.type = urltype
     name = name.replace('-', '_')
     if not hasattr(self, name):
         if proxy:
             return self.open_unknown_proxy(proxy, fullurl, data)
         else:
             return self.open_unknown(fullurl, data)
     try:
         return getattr(self, name)(url, data, method)
     except socket.error, msg:
         raise IOError, ('socket error', msg), sys.exc_info()[2]
Beispiel #2
0
def retrieve(self,
             url,
             filename=None,
             reporthook=None,
             data=None,
             maxtries=5,
             r_range=None):
    """retrieve(url) returns (filename, headers) for a local object
    or (tempfilename, headers) for a remote object.
    If it fails, it relaunches itself until the dl is complete or
    maxtries == 0 (maxtries == -1 for unlimited tries).
    Range tuple(start, end) indicates the range of the remote object
    we have to retrieve (ignored for local files)"""

    if maxtries < -1:
        raise ValueError, 'maxtries must be at least equal with -1'

    url = unwrap(toBytes(url))

    if self.tempcache and url in self.tempcache:
        return self.tempcache[url]

    type, url1 = splittype(url)

    if filename is None and (not type or type == 'file'):
        try:
            fp = self.open_local_file(url1)
            hdrs = fp.info()
            fp.close()

            return url2pathname(splithost(url1)[1]), hdrs
        except IOError, msg:
            pass
Beispiel #3
0
def retrieve(self, url, filename=None, reporthook=None, data=None,
             maxtries=5, r_range=None):
    """retrieve(url) returns (filename, headers) for a local object
    or (tempfilename, headers) for a remote object.
    If it fails, it relaunches itself until the dl is complete or
    maxtries == 0 (maxtries == -1 for unlimited tries).
    Range tuple(start, end) indicates the range of the remote object
    we have to retrieve (ignored for local files)"""
 
    if maxtries < -1:
        raise ValueError, 'maxtries must be at least equal with -1'
 
    url = unwrap(toBytes(url))
 
    if self.tempcache and url in self.tempcache:
        return self.tempcache[url]
 
    type, url1 = splittype(url)
 
    if filename is None and (not type or type == 'file'):
        try:
            fp = self.open_local_file(url1)
            hdrs = fp.info()
            fp.close()
 
            return url2pathname(splithost(url1)[1]), hdrs
        except IOError, msg:
            pass
Beispiel #4
0
 def __init__(self, url, data=None, headers={}):
     # unwrap('<URL:type://host/path>') --> 'type://host/path'
     self.__original = unwrap(url)
     self.type = None
     # self.__r_type is what's left after doing the splittype
     self.host = None
     self.port = None
     self.data = data
     self.headers = {}
     self.headers.update(headers)
 def __init__(self, url, data=None, headers={}):
     # unwrap('<URL:type://host/path>') --> 'type://host/path'
     self.__original = unwrap(url)
     self.type = None
     # self.__r_type is what's left after doing the splittype
     self.host = None
     self.port = None
     self.data = data
     self.headers = {}
     for key, value in headers.items():
         self.add_header(key, value)
Beispiel #6
0
    def retrieve_resume(self, url, filename, reporthook=None, data=None):
        """retrieve_resume(url) returns (filename, headers) for a local object
        or (tempfilename, headers) for a remote object.
        The filename argument is REQUIRED (no tempfile creation code here!)
        Additionally resumes a download if the local filename exists"""

        current_size = 0
        tfp = None
        if os.path.exists(filename):
            try:
                current_size = os.path.getsize(filename)
                tfp = open(filename, 'ab')
                #If the file exists, then only download the remainder
                self.addheader('Range', 'bytes=%s-' % (current_size))
            except:
                log('Cannot open file for resuming: %s', filename, sender=self, traceback=True)
                tfp = None
                current_size = 0

        if tfp is None:
            tfp = open(filename, 'wb')

        url = urllib.unwrap(urllib.toBytes(url))
        fp = self.open(url, data)
        headers = fp.info()
        result = filename, headers
        bs = 1024*8
        size = -1
        read = current_size
        blocknum = int(current_size/bs)
        if reporthook:
            if "content-length" in headers:
                size = int(headers["Content-Length"]) + current_size
            reporthook(blocknum, bs, size)
        while 1:
            block = fp.read(bs)
            if block == "":
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            if reporthook:
                reporthook(blocknum, bs, size)
        fp.close()
        tfp.close()
        del fp
        del tfp

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError("retrieval incomplete: got only %i out "
                                       "of %i bytes" % (read, size), result)

        return result
Beispiel #7
0
 def __init__(self, url, data=None, headers={}):
     # unwrap('<URL:type://host/path>') --> 'type://host/path'
     self.__original = unwrap(url)
     self.type = None
     # self.__r_type is what's left after doing the splittype
     self.host = None
     self.port = None
     self.data = data
     self.headers = {}
     for key, value in headers.items():
         self.add_header(key, value)
Beispiel #8
0
 def begin_retrieve(self, url, filename=None, reporthook=None):
     url = unwrap(url)
     self.__clean_retrieve(url)
     type, url1 = splittype(url)
     if not filename and (not type or type == 'file'):
         try:
             fp = self.open_local_file(url1)
             hdrs = fp.info()
             del fp
             return url2pathname(splithost(url1)[1]), hdrs
         except IOError, msg:
             pass
Beispiel #9
0
 def retrieve(self, url, filename=None, reporthook=None, blocksize=262144):
     url = urllib.unwrap(url)
     if self.tempcache and self.tempcache.has_key(url):
         return self.tempcache[url]
     type, url1 = urllib.splittype(url)
     if not filename and (not type or type == 'file'):
         try:
             fp = self.open_local_file(url1)
             hdrs = fp.info()
             del fp
             return url2pathname(urllib.splithost(url1)[1]), hdrs
         except IOError, msg:
             pass
Beispiel #10
0
 def retrieve(self, url, filename=None, reporthook=None, blocksize=262144):
     url = urllib.unwrap(url)
     if self.tempcache and self.tempcache.has_key(url):
         return self.tempcache[url]
     type, url1 = urllib.splittype(url)
     if not filename and (not type or type == 'file'):
         try:
             fp = self.open_local_file(url1)
             hdrs = fp.info()
             del fp
             return url2pathname(urllib.splithost(url1)[1]), hdrs
         except IOError, msg:
             pass
Beispiel #11
0
def lambda_handler(event, context):
    try:
        body_dict = event['body-json']
        payload = str(urllib.unwrap(body_dict))
        payload = urlparse.parse_qs(payload)
        payload_dict = payload['payload'][0]

        json_dict = json.loads(payload_dict)

    except Exception, e:
        print event
        print e
        print 'event parsing error'
        raise Exception("Bad Request: request failed")
Beispiel #12
0
 def retrieve(self, url, filename=None, reporthook=None, data=None):
     """retrieve(url) returns (filename, headers) for a local object
     or (tempfilename, headers) for a remote object."""
     url = urllib.unwrap(urllib.toBytes(url))
     if self.tempcache and url in self.tempcache:
         return self.tempcache[url]
     type, url1 = urllib.splittype(url)
     if filename is None and (not type or type == 'file'):
         try:
             fp = self.open_local_file(url1)
             hdrs = fp.info()
             del fp
             return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
         except IOError, msg:
             pass
Beispiel #13
0
 def retrieve(self, url, filename=None, reporthook=None, data=None):
     """retrieve(url) returns (filename, headers) for a local object
     or (tempfilename, headers) for a remote object."""
     url = urllib.unwrap(urllib.toBytes(url))
     if self.tempcache and url in self.tempcache:
         return self.tempcache[url]
     type, url1 = urllib.splittype(url)
     if filename is None and (not type or type == 'file'):
         try:
             fp = self.open_local_file(url1)
             hdrs = fp.info()
             del fp
             return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
         except IOError, msg:
             pass
 def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False):
     # unwrap('<URL:type://host/path>') --> 'type://host/path'
     self.__original = unwrap(url)
     self.type = None
     # self.__r_type is what's left after doing the splittype
     self.host = None
     self.port = None
     self.data = data
     self.headers = {}
     for key, value in headers.items():
         self.add_header(key, value)
     self.unredirected_hdrs = {}
     if origin_req_host is None:
         origin_req_host = cookielib.request_host(self)
     self.origin_req_host = origin_req_host
     self.unverifiable = unverifiable
Beispiel #15
0
    def __init__(self, url, data = None, headers = {}, origin_req_host = None, unverifiable = False):
        self.__original = unwrap(url)
        self.__original, fragment = splittag(self.__original)
        self.type = None
        self.host = None
        self.port = None
        self._tunnel_host = None
        self.data = data
        self.headers = {}
        for key, value in headers.items():
            self.add_header(key, value)

        self.unredirected_hdrs = {}
        if origin_req_host is None:
            origin_req_host = request_host(self)
        self.origin_req_host = origin_req_host
        self.unverifiable = unverifiable
Beispiel #16
0
 def __init__(self, url, data=None, headers={},
              origin_req_host=None, unverifiable=False):
     # unwrap('<URL:type://host/path>') --> 'type://host/path'
     self.__original = unwrap(url)
     self.type = None
     # self.__r_type is what's left after doing the splittype
     self.host = None
     self.port = None
     self.data = data
     self.headers = {}
     for key, value in headers.items():
         self.add_header(key, value)
     self.unredirected_hdrs = {}
     if origin_req_host is None:
         origin_req_host = request_host(self)
     self.origin_req_host = origin_req_host
     self.unverifiable = unverifiable
    def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False):
        self.__original = unwrap(url)
        self.__original, self.__fragment = splittag(self.__original)
        self.type = None
        self.host = None
        self.port = None
        self._tunnel_host = None
        self.data = data
        self.headers = {}
        for key, value in headers.items():
            self.add_header(key, value)

        self.unredirected_hdrs = {}
        if origin_req_host is None:
            origin_req_host = request_host(self)
        self.origin_req_host = origin_req_host
        self.unverifiable = unverifiable
        return
Beispiel #18
0
 def retrieve(self, url, filename=None, reporthook=None):
     # retrieve(url) returns (filename, None) for a local object
     # or (tempfilename, headers) for a remote object.
     url = unwrap(url)
     import urlparse
     scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
     if not scheme or scheme == 'file':
         i = string.find(path, '?')
         if i > 0:
             path = path[:i]
         url = urlparse.urlunparse((scheme, netloc, path, '', '', ''))
     if self.__prefetchcache.has_key(url):
         # complete prefetch first
         #print 'completing prefetch'
         self.__fin_retrieve(url)
     if self.__prefetchtempfiles.has_key(url):
         #print 'retrieving prefetched',self.__prefetchtempfiles[url]
         return self.__prefetchtempfiles[url]
     return _OriginalFancyURLopener.retrieve(self, url, filename,
                                             reporthook)
Beispiel #19
0
    def __init__(self, url, data = None, headers = {},
                 origin_req_host = None, unverifiable = False,
                 method = None, follow_redirects = True,
                 on_redirect = None, accumulate_body = True,
                 adjust_headers = True):

        self.adjust_headers = adjust_headers
        self._original = urllib.unwrap(url)

        util.Events.EventMixin.__init__(self)
        urllib2.Request.__init__(self, url, data, headers, origin_req_host, unverifiable)
        self.follow_redirects = follow_redirects
        self._method = method
        self.headers = HTTPHeaders(self.headers)
        self.unredirected_hdrs = HTTPHeaders(self.unredirected_hdrs)

        self.redirect_cb = on_redirect
        self.callback = None
        self.accumulate_body = accumulate_body

        self.redirected = False
Beispiel #20
0
def im_handler(message):
    print 'IM handler'

    try:
        body_dict = message['body-json']

        payload = str(urllib.unwrap(body_dict))
        # payload = str(urllib.unquote(payload).decode('utf8'))
        payload = urlparse.parse_qs(payload)

        payload_dict = payload['payload'][0]

        json_dict = json.loads(payload_dict)

        actions = json_dict['actions']

        menu_name = str(actions[0]['name'])

        channel = json_dict['channel']['id']
        team_id = json_dict['team']['id']
        user_id = json_dict['user']['id']

    except KeyError, e:
        raise Exception('Bad Request: %s' % e)
Beispiel #21
0
 def test_unwrap(self):
     url = urllib.unwrap('<URL:type://host/path>')
     self.assertEqual(url, 'type://host/path')
  def retrieve(self, url, filename=None, reporthook=None, data=None):
    """ Retrieves data from the given url and returns a tuple of filename and headers

    Args:
      url (str): url of the data to be retrieved
      filename (str, optional): filename from the url to download
      reporthook: (function, optional): function that should be called for e.g. keeping an UI updated with current state
      data (, optional):

    Returns:
      result: (filename, headers)

    See Also:
        urllib.URLopener
    """
    self._canceled=False
    url = urllib.unwrap(urllib.toBytes(url))
    if self.tempcache and url in self.tempcache:
      return self.tempcache[url]
    type, url1 = urllib.splittype(url)
    if filename is None and (not type or type == 'file'):
      try:
        fp = self.open_local_file(url1)
        hdrs = fp.info()
        fp.close()
        return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
      except IOError:
        pass
    fp = self.open(url, data)
    try:
      headers = fp.info()
      if filename:
        tfp = open(filename, 'wb')
      else:
        import tempfile
        garbage, path = urllib.splittype(url)
        garbage, path = urllib.splithost(path or "")
        path, garbage = urllib.splitquery(path or "")
        path, garbage = urllib.splitattr(path or "")
        suffix = os.path.splitext(path)[1]
        (fd, filename) = tempfile.mkstemp(suffix)
        self.__tempfiles.append(filename)
        tfp = os.fdopen(fd, 'wb')
      try:
        result = filename, headers
        if self.tempcache is not None:
          self.tempcache[url] = result
        bs = 1024 * 8
        size = -1
        read = 0
        blocknum = 0
        if "content-length" in headers:
          size = int(headers["Content-Length"])
        if reporthook:
          reporthook(blocknum, bs, size)
        while not self._canceled:
          block = fp.read(bs)
          if block == "":
            break
          read += len(block)
          tfp.write(block)
          blocknum += 1
          if reporthook:
            reporthook(blocknum, bs, size)
      finally:
        tfp.close()
    finally:
      fp.close()

    # raise exception if actual size does not match content-length header
    if size >= 0 and read < size:
      raise urllib.ContentTooShortError("retrieval incomplete: got only %i out "
                                 "of %i bytes" % (read, size), result)

    if self._canceled and os.path.exists(filename):
      os.remove(filename)
    return result
Beispiel #23
0
 def test_unwrap(self):
     url = urllib.unwrap("<URL:type://host/path>")
     self.assertEqual(url, "type://host/path")
Beispiel #24
0
"""An extensible library for opening URLs using a variety of protocols
Beispiel #25
0
"""An extensible library for opening URLs using a variety of protocols
Beispiel #26
0
    def retrieve_resume(self, url, filename, reporthook=None, data=None):
        """Download files from an URL; return (headers, real_url)

        Resumes a download if the local filename exists and
        the server supports download resuming.
        """

        current_size = 0
        tfp = None
        if os.path.exists(filename):
            try:
                current_size = os.path.getsize(filename)
                tfp = open(filename, 'ab')
                #If the file exists, then only download the remainder
                if current_size > 0:
                    self.addheader('Range', 'bytes=%s-' % (current_size))
            except:
                logger.warn('Cannot resume download: %s', filename, exc_info=True)
                tfp = None
                current_size = 0

        if tfp is None:
            tfp = open(filename, 'wb')

        # Fix a problem with bad URLs that are not encoded correctly (bug 549)
        url = url.decode('ascii', 'ignore')
        url = url.translate(self.ESCAPE_CHARS)
        url = url.encode('ascii')

        url = urllib.unwrap(urllib.toBytes(url))
        fp = self.open(url, data)
        headers = fp.info()

        if current_size > 0:
            # We told the server to resume - see if she agrees
            # See RFC2616 (206 Partial Content + Section 14.16)
            # XXX check status code here, too...
            range = ContentRange.parse(headers.get('content-range', ''))
            if range is None or range.start != current_size:
                # Ok, that did not work. Reset the download
                # TODO: seek and truncate if content-range differs from request
                tfp.close()
                tfp = open(filename, 'wb')
                current_size = 0
                logger.warn('Cannot resume: Invalid Content-Range (RFC2616).')

        result = headers, fp.geturl()
        bs = 1024*8
        size = -1
        read = current_size
        blocknum = int(current_size/bs)
        if reporthook:
            if "content-length" in headers:
                size = int(headers.getrawheader("Content-Length"))  + current_size
            reporthook(blocknum, bs, size)
        while read < size or size == -1:
            if size == -1:
                block = fp.read(bs)
            else:
                block = fp.read(min(size-read, bs))
            if block == "":
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            if reporthook:
                reporthook(blocknum, bs, size)
        fp.close()
        tfp.close()
        del fp
        del tfp

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError("retrieval incomplete: got only %i out "
                                       "of %i bytes" % (read, size), result)

        return result
Beispiel #27
0
    def retrieve_resume(self, url, filename, reporthook=None, data=None):
        """Download files from an URL; return (headers, real_url)

        Resumes a download if the local filename exists and
        the server supports download resuming.
        """

        current_size = 0
        tfp = None
        if os.path.exists(filename):
            try:
                current_size = os.path.getsize(filename)
                tfp = open(filename, 'ab')
                #If the file exists, then only download the remainder
                if current_size > 0:
                    self.addheader('Range', 'bytes=%s-' % (current_size))
            except:
                logger.warn('Cannot resume download: %s',
                            filename,
                            exc_info=True)
                tfp = None
                current_size = 0

        if tfp is None:
            tfp = open(filename, 'wb')

        # Fix a problem with bad URLs that are not encoded correctly (bug 549)
        url = url.decode('ascii', 'ignore')
        url = url.translate(self.ESCAPE_CHARS)
        url = url.encode('ascii')

        url = urllib.unwrap(urllib.toBytes(url))
        fp = self.open(url, data)
        headers = fp.info()

        if current_size > 0:
            # We told the server to resume - see if she agrees
            # See RFC2616 (206 Partial Content + Section 14.16)
            # XXX check status code here, too...
            range = ContentRange.parse(headers.get('content-range', ''))
            if range is None or range.start != current_size:
                # Ok, that did not work. Reset the download
                # TODO: seek and truncate if content-range differs from request
                tfp.close()
                tfp = open(filename, 'wb')
                current_size = 0
                logger.warn('Cannot resume: Invalid Content-Range (RFC2616).')

        result = headers, fp.geturl()
        bs = 1024 * 8
        size = -1
        read = current_size
        blocknum = int(current_size / bs)
        if reporthook:
            if "content-length" in headers:
                size = int(
                    headers.getrawheader("Content-Length")) + current_size
            reporthook(blocknum, bs, size)
        while read < size or size == -1:
            if size == -1:
                block = fp.read(bs)
            else:
                block = fp.read(min(size - read, bs))
            if block == "":
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            if reporthook:
                reporthook(blocknum, bs, size)
        fp.close()
        tfp.close()
        del fp
        del tfp

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError(
                "retrieval incomplete: got only %i out "
                "of %i bytes" % (read, size), result)

        return result
Beispiel #28
0
    def retrieve(self, url, filename=None, reporthook=None, data=None):
        # overridden method from urllib.URLopener
        self._cancelDownload = False
        url = urllib.unwrap(urllib.toBytes(url))
        if self.tempcache and url in self.tempcache:
            return self.tempcache[url]
        type, url1 = urllib.splittype(url)
        if filename is None and (not type or type == 'file'):
            try:
                fp = self.open_local_file(url1)
                hdrs = fp.info()
                fp.close()
                return urllib.url2pathname(urllib.splithost(url1)[1]), hdrs
            except IOError:
                pass
        fp = self.open(url, data)
        try:
            headers = fp.info()
            if filename:
                tfp = open(filename, 'wb')
            else:
                import tempfile
                garbage, path = urllib.splittype(url)
                garbage, path = urllib.splithost(path or "")
                path, garbage = urllib.splitquery(path or "")
                path, garbage = urllib.splitattr(path or "")
                suffix = os.path.splitext(path)[1]
                (fd, filename) = tempfile.mkstemp(suffix)
                self.__tempfiles.append(filename)
                tfp = os.fdopen(fd, 'wb')
            try:
                result = filename, headers
                if self.tempcache is not None:
                    self.tempcache[url] = result
                bs = 1024 * 8
                size = -1
                read = 0
                blocknum = 0
                if "content-length" in headers:
                    size = int(headers["Content-Length"])
                if reporthook:
                    reporthook(blocknum, bs, size)
                while not self._cancelDownload:
                    block = fp.read(bs)
                    if block == "":
                        break
                    read += len(block)
                    tfp.write(block)
                    blocknum += 1
                    if reporthook:
                        reporthook(blocknum, bs, size)
            finally:
                tfp.close()
        finally:
            fp.close()

        # raise exception if actual size does not match content-length header
        if size >= 0 and read < size:
            raise urllib.ContentTooShortError(
                "retrieval incomplete: got only %i out "
                "of %i bytes" % (read, size), result)

        if self._cancelDownload and os.path.exists(filename):
            os.remove(filename)
            self.wasCanceled = True
        return result
Beispiel #29
0
 def test_unwrap(self):
     url = urllib.unwrap('<URL:type://host/path>')
     self.assertEqual(url, 'type://host/path')
Beispiel #30
0
 def __init__(self, response):
     self._next = None
     link, params = parse_header(response.headers.get('link', ''))
     if params.get('rel','').strip('"\'') == 'next':
         self._next = unwrap(link)