Exemple #1
0
def getFileFromURL(url, filename=None, proxyfilename=None):
    """
    Read the content of a URL and copy it into a file.

    url: the link you would like to retrieve
    filename: the local filename where the url is saved to. Defaults to the filename in the url
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Return the filename used to save the file or raises ClientException in case of errors (a status attribute is added if the error is an http one).
    """
    parsedurl = urlparse(url)
    if filename == None:
        path = parsedurl.path
        filename = os.path.basename(path)
    try:
        opener = urllib.URLopener(key_file=proxyfilename,
                                  cert_file=proxyfilename)
        socket = opener.open(url)
        status = socket.getcode()
        # Read the file by chunks instead of all at once, appending each chunk to the final result.
        # This lowers the memory overhead, which can be a problem with big files.
        with open(filename, 'a') as f:
            f.seek(0)
            f.truncate()
            while True:
                piece = socket.read(1024)
                if not piece:
                    break
                f.write(piece)
    except IOError as ioex:
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        exc = ClientException(msg)
        if ioex[0] == 'http error':
            exc.status = ioex[1]
        raise exc
    except Exception as ex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ex)
        msg = "Unexpected error while trying to retrieve file from %s: %s" % (
            url, ex)
        raise ClientException(msg)
    if status != 200 and parsedurl.scheme in ['http', 'https']:
        exc = ClientException(
            "Unable to retieve the file from %s. HTTP status code %s. HTTP content: %s"
            % (url, status, socket.info()))
        exc.status = status
        raise exc
    return filename
Exemple #2
0
def getFileFromURL(url, filename = None, proxyfilename = None):
    """
    Read the content of a URL and copy it into a file.

    url: the link you would like to retrieve
    filename: the local filename where the url is saved to. Defaults to the filename in the url
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Return the filename used to save the file or raises ClientException in case of errors (a status attribute is added if the error is an http one).
    """
    parsedurl = urlparse(url)
    if filename == None:
        path = parsedurl.path
        filename = os.path.basename(path)
    try:
        opener = urllib.URLopener(key_file = proxyfilename, cert_file = proxyfilename)
        socket = opener.open(url)
        status = socket.getcode()
        # Read the file by chunks instead of all at once, appending each chunk to the final result.
        # This lowers the memory overhead, which can be a problem with big files.
        with open (filename, 'a') as f:
            f.seek(0)
            f.truncate()
            while True:
                piece = socket.read(1024)
                if not piece:
                    break
                f.write(piece)
    except IOError as ioex:
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        exc = ClientException(msg)
        if ioex[0] == 'http error':
            exc.status = ioex[1]
        raise exc
    except Exception as ex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ex)
        msg = "Unexpected error while trying to retrieve file from %s: %s" % (url, ex)
        raise ClientException(msg)
    if status != 200 and parsedurl.scheme in ['http', 'https']:
        exc = ClientException("Unable to retieve the file from %s. HTTP status code %s. HTTP content: %s" % (url, status, socket.info()))
        exc.status = status
        raise exc
    return filename
Exemple #3
0
def getFileFromURL(url, filename=None, proxyfilename=None):
    """
    Read the content of a URL and copy it into a file.

    url: the link you would like to retrieve
    filename: the local filename where the url is saved to. Defaults to the filename in the url
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Return the filename used to save the file or raises ClientException in case of errors (a status attribute is added if the error is an http one).
    """
    parsedurl = urlparse(url)
    if filename == None:
        path = parsedurl.path
        filename = os.path.basename(path)
    try:
        opener = urllib.URLopener(key_file=proxyfilename,
                                  cert_file=proxyfilename)
        socket = opener.open(url)
        status = socket.getcode()
        filestr = socket.read()
    except IOError as ioex:
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        exc = ClientException(msg)
        if ioex[0] == 'http error':
            exc.status = ioex[1]
        raise exc
    except Exception as ex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ex)
        msg = "Unexpected error while trying to retrieve file from %s: %s" % (
            url, ex)
        raise ClientException(msg)
    if status != 200 and parsedurl.scheme in ['http', 'https']:
        exc = ClientException(
            "Unable to retieve the file from %s. HTTP status code %s. HTTP content: %s"
            % (url, status, socket.info()))
        exc.status = status
        raise exc
    with open(filename, 'w') as f:
        f.write(filestr)
    return filename
def getFileFromURL(url, filename = None, proxyfilename = None):
    """
    Read the content of a URL and copy it into a file.

    url: the link you would like to retrieve
    filename: the local filename where the url is saved to. Defaults to the filename in the url
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Return the filename used to save the file or raises ClientException in case of errors (a status attribute is added if the error is an http one).
    """
    parsedurl = urlparse(url)
    if filename == None:
        path = parsedurl.path
        filename = os.path.basename(path)
    try:
        opener = urllib.URLopener(key_file = proxyfilename, cert_file = proxyfilename)
        socket = opener.open(url)
        status = socket.getcode()
        filestr = socket.read()
    except IOError as ioex:
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        exc = ClientException(msg)
        if ioex[0] == 'http error':
            exc.status = ioex[1]
        raise exc
    except Exception as ex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ex)
        msg = "Unexpected error while trying to retrieve file from %s: %s" % (url, ex)
        raise ClientException(msg)
    if status != 200 and parsedurl.scheme in ['http', 'https']:
        exc = ClientException("Unable to retieve the file from %s. HTTP status code %s. HTTP content: %s" % (url, status, socket.info()))
        exc.status = status
        raise exc
    with open(filename, 'w') as f:
        f.write(filestr)
    return filename