コード例 #1
0
def makeRequest(url, params):
    '''
    a test docstring
    '''
    mgr = RequestHandler()
    header, data = mgr.request(url, params, ckey=ckey, cert=cert)
    if header.status != 200:
        print "ERROR"
    return data
コード例 #2
0
ファイル: UserUtilities.py プロジェクト: emaszs/CRABClient
def getDataFromURL(url, proxyfilename = None):
    """
    Read the content of a URL and return it as a string.
    Type of content should not matter, it can be a json file or a tarball for example.

    url: the link you would like to retrieve
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Returns binary data encoded as a string, which can be later processed
    according to what kind of content it represents.
    """

    # Get rid of unicode which may cause problems in pycurl
    stringUrl = url.encode('ascii')

    reqHandler = RequestHandler()
    _, data = reqHandler.request(url=stringUrl, params={}, ckey=proxyfilename,
                                 cert=proxyfilename,
                                 capath=HTTPRequests.getCACertPath())

    return data
コード例 #3
0
ファイル: UserUtilities.py プロジェクト: belforte/CRABClient
def getDataFromURL(url, proxyfilename = None):
    """
    Read the content of a URL and return it as a string.
    Type of content should not matter, it can be a json file or a tarball for example.

    url: the link you would like to retrieve
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Returns binary data encoded as a string, which can be later processed
    according to what kind of content it represents.
    """

    # Get rid of unicode which may cause problems in pycurl
    stringUrl = url.encode('ascii')

    reqHandler = RequestHandler()
    _, data = reqHandler.request(url=stringUrl, params={}, ckey=proxyfilename,
                                 cert=proxyfilename,
                                 capath=HTTPRequests.getCACertPath())

    return data
コード例 #4
0
class PyCurlManager(unittest.TestCase):
    """Test pycurl_manager module"""

    def setUp(self):
        "initialization"
        self.mgr = RequestHandler()
        self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem')
        self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem')

    def testMulti(self):
        """
        Test fetch of several urls at once, one of the url relies on CERN SSO.
        """
        tfile = tempfile.NamedTemporaryFile()
        url1 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/help"
        url2 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/datatiers"
        url3 = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary"
        cern_sso_cookie(url3, tfile.name, self.cert, self.ckey)
        cookie = {url3: tfile.name}
        urls = [url1, url2, url3]
        data = getdata(urls, self.ckey, self.cert, cookie=cookie)
        headers = 0
        for row in data:
            if '200 OK' in row['headers']:
                headers += 1
        self.assertTrue(headers, 3)

    def testSingle(self):
        """
        Test single call to CERN SSO url.
        """
        # test RequestHandler
        url = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary"
        params = {}
        tfile = tempfile.NamedTemporaryFile()
        cern_sso_cookie(url, tfile.name, self.cert, self.ckey)
        cookie = {url: tfile.name}
        header, _ = self.mgr.request(url, params, cookie=cookie)
        self.assertTrue(header.status, 200)
コード例 #5
0
class Requests(dict):
    """
    Generic class for sending different types of HTTP Request to a given URL
    """
    def __init__(self, url='http://localhost', idict=None):
        """
        url should really be host - TODO fix that when have sufficient code
        coverage and change _getURLOpener if needed
        """
        if not idict:
            idict = {}
        dict.__init__(self, idict)
        self.pycurl = idict.get('pycurl', None)
        self.capath = idict.get('capath', None)
        if self.pycurl:
            self.reqmgr = RequestHandler()

        # set up defaults
        self.setdefault("accept_type", 'text/html')
        self.setdefault("content_type", 'application/x-www-form-urlencoded')
        self.additionalHeaders = {}

        # check for basic auth early, as if found this changes the url
        urlComponent = sanitizeURL(url)
        if urlComponent['username'] is not None:
            self.addBasicAuth(urlComponent['username'],
                              urlComponent['password'])
            url = urlComponent['url']  # remove user, password from url

        self.setdefault("host", url)

        # then update with the incoming dict
        self.update(idict)

        self['endpoint_components'] = urlparse.urlparse(self['host'])

        # If cachepath = None disable caching
        if 'cachepath' in idict and idict['cachepath'] is None:
            self["req_cache_path"] = None
        else:
            cache_dir = (self.cachePath(idict.get('cachepath'),
                                        idict.get('service_name')))
            self["cachepath"] = cache_dir
            self["req_cache_path"] = os.path.join(cache_dir, '.cache')
        self.setdefault("cert", None)
        self.setdefault("key", None)
        self.setdefault('capath', None)
        self.setdefault("timeout", 300)
        self.setdefault("logger", logging)

        check_server_url(self['host'])

    def get(self,
            uri=None,
            data={},
            incoming_headers={},
            encode=True,
            decode=True,
            contentType=None):
        """
        GET some data
        """
        return self.makeRequest(uri, data, 'GET', incoming_headers, encode,
                                decode, contentType)

    def post(self,
             uri=None,
             data={},
             incoming_headers={},
             encode=True,
             decode=True,
             contentType=None):
        """
        POST some data
        """
        return self.makeRequest(uri, data, 'POST', incoming_headers, encode,
                                decode, contentType)

    def put(self,
            uri=None,
            data={},
            incoming_headers={},
            encode=True,
            decode=True,
            contentType=None):
        """
        PUT some data
        """
        return self.makeRequest(uri, data, 'PUT', incoming_headers, encode,
                                decode, contentType)

    def delete(self,
               uri=None,
               data={},
               incoming_headers={},
               encode=True,
               decode=True,
               contentType=None):
        """
        DELETE some data
        """
        return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode,
                                decode, contentType)

    def makeRequest(self,
                    uri=None,
                    data={},
                    verb='GET',
                    incoming_headers={},
                    encoder=True,
                    decoder=True,
                    contentType=None):
        """
        Wrapper around request helper functions.
        """
        if self.pycurl:
            result = self.makeRequest_pycurl(uri, data, verb, incoming_headers,
                                             encoder, decoder, contentType)
        else:
            result = self.makeRequest_httplib(uri, data, verb,
                                              incoming_headers, encoder,
                                              decoder, contentType)
        return result

    def makeRequest_pycurl(self,
                           uri=None,
                           params={},
                           verb='GET',
                           incoming_headers={},
                           encoder=True,
                           decoder=True,
                           contentType=None):
        """
        Make HTTP(s) request via pycurl library. Stay complaint with
        makeRequest_httplib method.
        """
        ckey, cert = self.getKeyCert()
        capath = self.getCAPath()
        if not contentType:
            contentType = self['content_type']
        headers = {
            "Content-type": contentType,
            "User-agent": "WMCore.Services.Requests/v001",
            "Accept": self['accept_type']
        }
        for key in self.additionalHeaders.keys():
            headers[key] = self.additionalHeaders[key]
        # And now overwrite any headers that have been passed into the call:
        headers.update(incoming_headers)
        url = self['host'] + uri
        response, data = self.reqmgr.request(url,
                                             params,
                                             headers,
                                             verb=verb,
                                             ckey=ckey,
                                             cert=cert,
                                             capath=capath,
                                             decode=decoder)
        return data, response.status, response.reason, response.fromcache

    def makeRequest_httplib(self,
                            uri=None,
                            data={},
                            verb='GET',
                            incoming_headers={},
                            encoder=True,
                            decoder=True,
                            contentType=None):
        """
        Make a request to the remote database. for a give URI. The type of
        request will determine the action take by the server (be careful with
        DELETE!). Data should be a dictionary of {dataname: datavalue}.

        Returns a tuple of the data from the server, decoded using the
        appropriate method the response status and the response reason, to be
        used in error handling.

        You can override the method to encode/decode your data by passing in an
        encoding/decoding function to this method. Your encoded data must end up
        as a string.

        """
        # TODO: User agent should be:
        # $client/$client_version (CMS)
        # $http_lib/$http_lib_version $os/$os_version ($arch)
        if not contentType:
            contentType = self['content_type']
        headers = {
            "Content-type": contentType,
            "User-agent": "WMCore.Services.Requests/v001",
            "Accept": self['accept_type']
        }
        encoded_data = ''

        for key in self.additionalHeaders.keys():
            headers[key] = self.additionalHeaders[key]

        # And now overwrite any headers that have been passed into the call:
        # WARNING: doesn't work with deplate so only accept gzip
        incoming_headers["accept-encoding"] = "gzip,identity"
        headers.update(incoming_headers)

        # httpib2 requires absolute url
        uri = self['host'] + uri

        # If you're posting an attachment, the data might not be a dict
        #   please test against ConfigCache_t if you're unsure.
        # assert type(data) == type({}), \
        #        "makeRequest input data must be a dict (key/value pairs)"

        if verb != 'GET' and data:
            if isinstance(encoder, (types.MethodType, types.FunctionType)):
                encoded_data = encoder(data)
            elif encoder == False:
                # Don't encode the data more than we have to
                #  we don't want to URL encode the data blindly,
                #  that breaks POSTing attachments... ConfigCache_t
                # encoded_data = urllib.urlencode(data)
                #  -- Andrew Melo 25/7/09
                encoded_data = data
            else:
                # Either the encoder is set to True or it's junk, so use
                # self.encode
                encoded_data = self.encode(data)
            headers["Content-length"] = len(encoded_data)
        elif verb == 'GET' and data:
            # encode the data as a get string
            uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True))

        headers["Content-length"] = str(len(encoded_data))

        # PY3 needed for compatibility because str under futurize is not a string. Can be just str in Py3 only
        # PY3 Don't let futurize change this
        assert isinstance(encoded_data, (str, basestring)), \
            "Data in makeRequest is %s and not encoded to a string" % type(encoded_data)

        # httplib2 will allow sockets to close on remote end without retrying
        # try to send request - if this fails try again - should then succeed
        try:
            conn = self._getURLOpener()
            response, result = conn.request(uri,
                                            method=verb,
                                            body=encoded_data,
                                            headers=headers)
            if response.status == 408:  # timeout can indicate a socket error
                raise socket.error
        except ServerNotFoundError as ex:
            # DNS cannot resolve this domain name, let's call it 'Service Unavailable'
            e = HTTPException()
            setattr(e, 'url', uri)
            setattr(e, 'status', 503)
            setattr(e, 'reason', 'Service Unavailable')
            setattr(e, 'result', str(ex))
            raise e
        except (socket.error, AttributeError):
            self['logger'].warn("Http request failed, retrying once again..")
            # AttributeError implies initial connection error - need to close
            # & retry. httplib2 doesn't clear httplib state before next request
            # if this is threaded this may spoil things
            # only have one endpoint so don't need to determine which to shut
            for con in conn.connections.values():
                con.close()
            conn = self._getURLOpener()
            # ... try again... if this fails propagate error to client
            try:
                response, result = conn.request(uri,
                                                method=verb,
                                                body=encoded_data,
                                                headers=headers)
            except AttributeError:
                msg = traceback.format_exc()
                # socket/httplib really screwed up - nuclear option
                conn.connections = {}
                raise socket.error('Error contacting: %s: %s' %
                                   (self.getDomainName(), msg))
        if response.status >= 400:
            e = HTTPException()
            setattr(e, 'req_data', encoded_data)
            setattr(e, 'req_headers', headers)
            setattr(e, 'url', uri)
            setattr(e, 'result', result)
            setattr(e, 'status', response.status)
            setattr(e, 'reason', response.reason)
            setattr(e, 'headers', response)
            raise e

        if isinstance(decoder, (types.MethodType, types.FunctionType)):
            result = decoder(result)
        elif decoder != False:
            result = self.decode(result)
        # TODO: maybe just return result and response...
        return result, response.status, response.reason, response.fromcache

    def encode(self, data):
        """
        encode data into some appropriate format, for now make it a string...
        """
        return urllib.urlencode(data, doseq=1)

    def decode(self, data):
        """
        decode data to some appropriate format, for now make it a string...
        """
        return data.__str__()

    def cachePath(self, given_path, service_name):
        """Return cache location"""
        if not service_name:
            service_name = 'REQUESTS'
        top = self.cacheTopPath(given_path, service_name)

        # deal with multiple Services that have the same service running and
        # with multiple users for a given Service
        if self.getUserName() is None:
            cachepath = os.path.join(top, self['endpoint_components'].netloc)
        else:
            cachepath = os.path.join(
                top, '%s-%s' % (self.getUserName(), self.getDomainName()))

        try:
            # only we should be able to write to this dir
            os.makedirs(cachepath, stat.S_IRWXU)
        except OSError:
            if not os.path.isdir(cachepath):
                raise
            Permissions.owner_readwriteexec(cachepath)

        return cachepath

    def cacheTopPath(self, given_path, service_name):
        """Where to cache results?

        Logic:
          o If passed in take that
          o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined?
          o Is WMCORE_CACHE_DIR set
          o Generate a temporary directory
          """
        if given_path:
            return given_path
        user = str(os.getuid())
        # append user id so users don't clobber each other
        lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower())
        for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'):
            if os.environ.get(var):
                firstbit = os.environ[var]
                break
        else:
            idir = tempfile.mkdtemp(prefix='.wmcore_cache_')
            self['deleteCacheOnExit'] = TempDirectory(idir)
            return idir

        return os.path.join(firstbit, lastbit)

    def getDomainName(self):
        """Parse netloc info to get hostname"""
        return self['endpoint_components'].hostname

    def getUserName(self):
        """Parse netloc to get user"""
        return self['endpoint_components'].username

    def _getURLOpener(self):
        """
        method getting a secure (HTTPS) connection
        """
        import httplib2
        key, cert = None, None
        if self['endpoint_components'].scheme == 'https':
            # only add certs to https requests
            # if we have a key/cert add to request,
            # if not proceed as not all https connections require them
            try:
                key, cert = self.getKeyCert()
            except Exception as ex:
                msg = 'No certificate or key found, authentication may fail'
                self['logger'].info(msg)
                self['logger'].debug(str(ex))

        try:
            # disable validation as we don't have a single PEM with all ca's
            http = httplib2.Http(self['req_cache_path'],
                                 self['timeout'],
                                 disable_ssl_certificate_validation=True)
        except TypeError:
            # old httplib2 versions disable validation by default
            http = httplib2.Http(self['req_cache_path'], self['timeout'])

        # Domain must be just a hostname and port. self[host] is a URL currently
        if key or cert:
            http.add_certificate(key=key, cert=cert, domain='')
        return http

    def addBasicAuth(self, username, password):
        """Add basic auth headers to request"""
        auth_string = "Basic %s" % base64.encodestring(
            '%s:%s' % (username, password)).strip()
        self.additionalHeaders["Authorization"] = auth_string

    def getKeyCert(self):
        """
       _getKeyCert_

       Get the user credentials if they exist, otherwise throw an exception.
       This code was modified from DBSAPI/dbsHttpService.py
        """

        # Zeroth case is if the class has over ridden the key/cert and has it
        # stored in self
        if self['cert'] and self['key']:
            key = self['key']
            cert = self['cert']
        else:
            key, cert = getKeyCertFromEnv()

        # Set but not found
        if key is None or cert is None:
            raise WMException('Request requires a host certificate and key',
                              "WMCORE-11")

        # All looks OK, still doesn't guarantee proxy's validity etc.
        return key, cert

    def getCAPath(self):
        """
        _getCAPath_

        Return the path of the CA certificates. The check is loose in the pycurl_manager:
        is capath == None then the server identity is not verified. To enable this check
        you need to set either the X509_CERT_DIR variable or the cacert key of the request.
        """
        capath = self['capath']
        if not capath:
            capath = getCAPathFromEnv()
        return capath

    def uploadFile(self,
                   fileName,
                   url,
                   fieldName='file1',
                   params=[],
                   verb='POST'):
        """
        Upload a file with curl streaming it directly from disk
        """
        ckey, cert = self.getKeyCert()
        capath = self.getCAPath()
        import pycurl
        c = pycurl.Curl()
        if verb == 'POST':
            c.setopt(c.POST, 1)
        elif verb == 'PUT':
            c.setopt(pycurl.CUSTOMREQUEST, 'PUT')
        else:
            raise HTTPException("Verb %s not sopported for upload." % verb)
        c.setopt(c.URL, url)
        fullParams = [(fieldName, (c.FORM_FILE, fileName))]
        fullParams.extend(params)
        c.setopt(c.HTTPPOST, fullParams)
        bbuf = StringIO.StringIO()
        hbuf = StringIO.StringIO()
        c.setopt(pycurl.WRITEFUNCTION, bbuf.write)
        c.setopt(pycurl.HEADERFUNCTION, hbuf.write)
        if capath:
            c.setopt(pycurl.CAPATH, capath)
            c.setopt(pycurl.SSL_VERIFYPEER, True)
        else:
            c.setopt(pycurl.SSL_VERIFYPEER, False)
        if ckey:
            c.setopt(pycurl.SSLKEY, ckey)
        if cert:
            c.setopt(pycurl.SSLCERT, cert)
        c.perform()
        hres = hbuf.getvalue()
        bres = bbuf.getvalue()
        rh = ResponseHeader(hres)
        c.close()
        if rh.status < 200 or rh.status >= 300:
            exc = HTTPException(bres)
            setattr(exc, 'req_data', fullParams)
            setattr(exc, 'url', url)
            setattr(exc, 'result', bres)
            setattr(exc, 'status', rh.status)
            setattr(exc, 'reason', rh.reason)
            setattr(exc, 'headers', rh.header)
            raise exc

        return bres

    def downloadFile(self, fileName, url):
        """
        Download a file with curl streaming it directly to disk
        """
        ckey, cert = self.getKeyCert()
        capath = self.getCAPath()
        import pycurl

        hbuf = StringIO.StringIO()

        with open(fileName, "wb") as fp:
            curl = pycurl.Curl()
            curl.setopt(pycurl.URL, url)
            curl.setopt(pycurl.WRITEDATA, fp)
            curl.setopt(pycurl.HEADERFUNCTION, hbuf.write)
            if capath:
                curl.setopt(pycurl.CAPATH, capath)
                curl.setopt(pycurl.SSL_VERIFYPEER, True)
            else:
                curl.setopt(pycurl.SSL_VERIFYPEER, False)
            if ckey:
                curl.setopt(pycurl.SSLKEY, ckey)
            if cert:
                curl.setopt(pycurl.SSLCERT, cert)
            curl.setopt(pycurl.FOLLOWLOCATION, 1)
            curl.perform()
            curl.close()

            header = ResponseHeader(hbuf.getvalue())
            if header.status < 200 or header.status >= 300:
                raise RuntimeError('Reading %s failed with code %s' %
                                   (url, header.status))
        return fileName, header
コード例 #6
0
ファイル: Requests.py プロジェクト: PerilousApricot/WMCore
class Requests(dict):
    """
    Generic class for sending different types of HTTP Request to a given URL
    """

    def __init__(self, url='http://localhost', idict=None):
        """
        url should really be host - TODO fix that when have sufficient code
        coverage and change _getURLOpener if needed
        """
        if not idict:
            idict = {}
        dict.__init__(self, idict)
        self.pycurl = idict.get('pycurl', None)
        self.capath = idict.get('capath', None)
        if self.pycurl:
            self.reqmgr = RequestHandler()

        # set up defaults
        self.setdefault("accept_type", 'text/html')
        self.setdefault("content_type", 'application/x-www-form-urlencoded')
        self.additionalHeaders = {}

        # check for basic auth early, as if found this changes the url
        urlComponent = sanitizeURL(url)
        if urlComponent['username'] is not None:
            self.addBasicAuth( \
                urlComponent['username'], urlComponent['password'])
            url = urlComponent['url']  # remove user, password from url

        self.setdefault("host", url)

        # then update with the incoming dict
        self.update(idict)

        self['endpoint_components'] = urlparse.urlparse(self['host'])

        # If cachepath = None disable caching
        if 'cachepath' in idict and idict['cachepath'] is None:
            self["req_cache_path"] = None
        else:
            cache_dir = (self.cachePath(idict.get('cachepath'), \
                                        idict.get('service_name')))
            self["cachepath"] = cache_dir
            self["req_cache_path"] = os.path.join(cache_dir, '.cache')
        self.setdefault("timeout", 300)
        self.setdefault("logger", logging)

        check_server_url(self['host'])

    def get(self, uri=None, data={}, incoming_headers={},
            encode=True, decode=True, contentType=None):
        """
        GET some data
        """
        return self.makeRequest(uri, data, 'GET', incoming_headers,
                                encode, decode, contentType)

    def post(self, uri=None, data={}, incoming_headers={},
             encode=True, decode=True, contentType=None):
        """
        POST some data
        """
        return self.makeRequest(uri, data, 'POST', incoming_headers,
                                encode, decode, contentType)

    def put(self, uri=None, data={}, incoming_headers={},
            encode=True, decode=True, contentType=None):
        """
        PUT some data
        """
        return self.makeRequest(uri, data, 'PUT', incoming_headers,
                                encode, decode, contentType)

    def delete(self, uri=None, data={}, incoming_headers={},
               encode=True, decode=True, contentType=None):
        """
        DELETE some data
        """
        return self.makeRequest(uri, data, 'DELETE', incoming_headers,
                                encode, decode, contentType)

    def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={},
                    encoder=True, decoder=True, contentType=None):
        """
        Wrapper around request helper functions.
        """
        if self.pycurl:
            result = self.makeRequest_pycurl(uri, data, verb, incoming_headers,
                                             encoder, decoder, contentType)
        else:
            result = self.makeRequest_httplib(uri, data, verb, incoming_headers,
                                              encoder, decoder, contentType)
        return result

    def makeRequest_pycurl(self, uri=None, params={}, verb='GET',
                           incoming_headers={}, encoder=True, decoder=True, contentType=None):
        """
        Make HTTP(s) request via pycurl library. Stay complaint with
        makeRequest_httplib method.
        """
        ckey, cert = self.getKeyCert()
        capath = self.getCAPath()
        if not contentType:
            contentType = self['content_type']
        headers = {"Content-type": contentType,
                   "User-agent": "WMCore.Services.Requests/v001",
                   "Accept": self['accept_type']}
        for key in self.additionalHeaders.keys():
            headers[key] = self.additionalHeaders[key]
        # And now overwrite any headers that have been passed into the call:
        headers.update(incoming_headers)
        url = self['host'] + uri
        response, data = self.reqmgr.request(url, params, headers, \
                                             verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder)
        return data, response.status, response.reason, response.fromcache

    def makeRequest_httplib(self, uri=None, data={}, verb='GET',
                            incoming_headers={}, encoder=True, decoder=True, contentType=None):
        """
        Make a request to the remote database. for a give URI. The type of
        request will determine the action take by the server (be careful with
        DELETE!). Data should be a dictionary of {dataname: datavalue}.

        Returns a tuple of the data from the server, decoded using the
        appropriate method the response status and the response reason, to be
        used in error handling.

        You can override the method to encode/decode your data by passing in an
        encoding/decoding function to this method. Your encoded data must end up
        as a string.

        """
        # TODO: User agent should be:
        # $client/$client_version (CMS)
        # $http_lib/$http_lib_version $os/$os_version ($arch)
        if not contentType:
            contentType = self['content_type']
        headers = {"Content-type": contentType,
                   "User-agent": "WMCore.Services.Requests/v001",
                   "Accept": self['accept_type']}
        encoded_data = ''

        for key in self.additionalHeaders.keys():
            headers[key] = self.additionalHeaders[key]

        # And now overwrite any headers that have been passed into the call:
        # WARNING: doesn't work with deplate so only accept gzip
        incoming_headers["accept-encoding"] = "gzip,identity"
        headers.update(incoming_headers)

        # httpib2 requires absolute url
        uri = self['host'] + uri

        # If you're posting an attachment, the data might not be a dict
        #   please test against ConfigCache_t if you're unsure.
        # assert type(data) == type({}), \
        #        "makeRequest input data must be a dict (key/value pairs)"

        if verb != 'GET' and data:
            if isinstance(encoder, (types.MethodType, types.FunctionType)):
                encoded_data = encoder(data)
            elif encoder == False:
                # Don't encode the data more than we have to
                #  we don't want to URL encode the data blindly,
                #  that breaks POSTing attachments... ConfigCache_t
                # encoded_data = urllib.urlencode(data)
                #  -- Andrew Melo 25/7/09
                encoded_data = data
            else:
                # Either the encoder is set to True or it's junk, so use
                # self.encode
                encoded_data = self.encode(data)
            headers["Content-length"] = len(encoded_data)
        elif verb == 'GET' and data:
            # encode the data as a get string
            uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True))

        headers["Content-length"] = str(len(encoded_data))

        assert isinstance(encoded_data, str), \
            "Data in makeRequest is %s and not encoded to a string" % type(encoded_data)

        # httplib2 will allow sockets to close on remote end without retrying
        # try to send request - if this fails try again - should then succeed
        try:
            conn = self._getURLOpener()
            response, result = conn.request(uri, method=verb,
                                                    body=encoded_data, headers=headers)
            if response.status == 408:  # timeout can indicate a socket error
                raise socket.error
        except (socket.error, AttributeError):
            self['logger'].warn("Http request failed, retrying once again..")
            # AttributeError implies initial connection error - need to close
            # & retry. httplib2 doesn't clear httplib state before next request
            # if this is threaded this may spoil things
            # only have one endpoint so don't need to determine which to shut
            for con in conn.connections.values():
                con.close()
            conn = self._getURLOpener()
            # ... try again... if this fails propagate error to client
            try:
                response, result = conn.request(uri, method=verb,
                                                        body=encoded_data, headers=headers)
            except AttributeError:
                msg = traceback.format_exc()
                # socket/httplib really screwed up - nuclear option
                conn.connections = {}
                raise socket.error('Error contacting: %s: %s' % (self.getDomainName(), msg))
        if response.status >= 400:
            e = HTTPException()
            setattr(e, 'req_data', encoded_data)
            setattr(e, 'req_headers', headers)
            setattr(e, 'url', uri)
            setattr(e, 'result', result)
            setattr(e, 'status', response.status)
            setattr(e, 'reason', response.reason)
            setattr(e, 'headers', response)
            raise e

        if isinstance(decoder, (types.MethodType, types.FunctionType)):
            result = decoder(result)
        elif decoder != False:
            result = self.decode(result)
        # TODO: maybe just return result and response...
        return result, response.status, response.reason, response.fromcache

    def encode(self, data):
        """
        encode data into some appropriate format, for now make it a string...
        """
        return urllib.urlencode(data, doseq=1)

    def decode(self, data):
        """
        decode data to some appropriate format, for now make it a string...
        """
        return data.__str__()

    def cachePath(self, given_path, service_name):
        """Return cache location"""
        if not service_name:
            service_name = 'REQUESTS'
        top = self.cacheTopPath(given_path, service_name)

        # deal with multiple Services that have the same service running and
        # with multiple users for a given Service
        if self.getUserName() is None:
            cachepath = os.path.join(top, self['endpoint_components'].netloc)
        else:
            cachepath = os.path.join(top, '%s-%s' % (self.getUserName(), self.getDomainName()))

        try:
            # only we should be able to write to this dir
            os.makedirs(cachepath, stat.S_IRWXU)
        except OSError:
            if not os.path.isdir(cachepath):
                raise
            Permissions.owner_readwriteexec(cachepath)

        return cachepath

    def cacheTopPath(self, given_path, service_name):
        """Where to cache results?

        Logic:
          o If passed in take that
          o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined?
          o Is WMCORE_CACHE_DIR set
          o Generate a temporary directory
          """
        if given_path:
            return given_path
        user = str(os.getuid())
        # append user id so users don't clobber each other
        lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower())
        for var in ('%s_CACHE_DIR' % service_name.upper(),
                    'WMCORE_CACHE_DIR'):
            if os.environ.get(var):
                firstbit = os.environ[var]
                break
        else:
            idir = tempfile.mkdtemp(prefix='.wmcore_cache_')
            self['deleteCacheOnExit'] = TempDirectory(idir)
            return idir

        return os.path.join(firstbit, lastbit)

    def getDomainName(self):
        """Parse netloc info to get hostname"""
        return self['endpoint_components'].hostname

    def getUserName(self):
        """Parse netloc to get user"""
        return self['endpoint_components'].username

    def _getURLOpener(self):
        """
        method getting a secure (HTTPS) connection
        """
        import httplib2
        key, cert = None, None
        if self['endpoint_components'].scheme == 'https':
            # only add certs to https requests
            # if we have a key/cert add to request,
            # if not proceed as not all https connections require them
            try:
                key, cert = self.getKeyCert()
            except Exception as ex:
                msg = 'No certificate or key found, authentication may fail'
                self['logger'].info(msg)
                self['logger'].debug(str(ex))

        try:
            # disable validation as we don't have a single PEM with all ca's
            http = httplib2.Http(self['req_cache_path'], self['timeout'],
                                 disable_ssl_certificate_validation=True)
        except TypeError:
            # old httplib2 versions disable validation by default
            http = httplib2.Http(self['req_cache_path'], self['timeout'])

        # Domain must be just a hostname and port. self[host] is a URL currently
        if key or cert:
            http.add_certificate(key=key, cert=cert, domain='')
        return http

    def addBasicAuth(self, username, password):
        """Add basic auth headers to request"""
        auth_string = "Basic %s" % base64.encodestring('%s:%s' % (
            username, password)).strip()
        self.additionalHeaders["Authorization"] = auth_string

    def getKeyCert(self):
        """
       _getKeyCert_

       Get the user credentials if they exist, otherwise throw an exception.
       This code was modified from DBSAPI/dbsHttpService.py
        """
        cert = None
        key = None
        # Zeroth case is if the class has over ridden the key/cert and has it
        # stored in self
        if 'cert' in self and 'key' in self and self['cert'] and self['key']:
            key = self['key']
            cert = self['cert']

        # Now we're trying to guess what the right cert/key combo is...
        # First preference to HOST Certificate, This is how it set in Tier0
        elif 'X509_HOST_CERT' in os.environ:
            cert = os.environ['X509_HOST_CERT']
            key = os.environ['X509_HOST_KEY']
        # Second preference to User Proxy, very common
        elif 'X509_USER_PROXY' in os.environ and os.path.exists(os.environ['X509_USER_PROXY']):
            cert = os.environ['X509_USER_PROXY']
            key = cert
        # Third preference to User Cert/Proxy combinition
        elif 'X509_USER_CERT' in os.environ:
            cert = os.environ['X509_USER_CERT']
            key = os.environ['X509_USER_KEY']
        # TODO: only in linux, unix case, add other os case
        # look for proxy at default location /tmp/x509up_u$uid
        elif os.path.exists('/tmp/x509up_u' + str(os.getuid())):
            cert = '/tmp/x509up_u' + str(os.getuid())
            key = cert

        # if interactive we can use an encrypted certificate
        elif sys.stdin.isatty():
            if os.path.exists(os.environ['HOME'] + '/.globus/usercert.pem'):
                cert = os.environ['HOME'] + '/.globus/usercert.pem'
                if os.path.exists(os.environ['HOME'] + '/.globus/userkey.pem'):
                    key = os.environ['HOME'] + '/.globus/userkey.pem'
                else:
                    key = cert

        # Set but not found
        if key and cert:
            if not os.path.exists(cert) or not os.path.exists(key):
                raise WMException('Request requires a host certificate and key',
                                  "WMCORE-11")

        # All looks OK, still doesn't guarantee proxy's validity etc.
        return key, cert

    def getCAPath(self):
        """
        _getCAPath_

        Return the path of the CA certificates. The check is loose in the pycurl_manager:
        is capath == None then the server identity is not verified. To enable this check
        you need to set either the X509_CERT_DIR variable or the cacert key of the request.
        """
        cacert = None
        if 'capath' in self:
            cacert = self['capath']
        elif "X509_CERT_DIR" in os.environ:
            cacert = os.environ["X509_CERT_DIR"]
        return cacert

    def uploadFile(self, fileName, url, fieldName='file1', params=[], verb='POST'):
        """
        Upload a file with curl streaming it directly from disk
        """
        ckey, cert = self.getKeyCert()
        capath = self.getCAPath()
        import pycurl
        c = pycurl.Curl()
        if verb == 'POST':
            c.setopt(c.POST, 1)
        elif verb == 'PUT':
            c.setopt(pycurl.CUSTOMREQUEST, 'PUT')
        else:
            raise HTTPException("Verb %s not sopported for upload." % verb)
        c.setopt(c.URL, url)
        fullParams = [(fieldName, (c.FORM_FILE, fileName))]
        fullParams.extend(params)
        c.setopt(c.HTTPPOST, fullParams)
        bbuf = StringIO.StringIO()
        hbuf = StringIO.StringIO()
        c.setopt(pycurl.WRITEFUNCTION, bbuf.write)
        c.setopt(pycurl.HEADERFUNCTION, hbuf.write)
        if capath:
            c.setopt(pycurl.CAPATH, capath)
            c.setopt(pycurl.SSL_VERIFYPEER, True)
        else:
            c.setopt(pycurl.SSL_VERIFYPEER, False)
        if ckey:
            c.setopt(pycurl.SSLKEY, ckey)
        if cert:
            c.setopt(pycurl.SSLCERT, cert)
        c.perform()
        hres = hbuf.getvalue()
        bres = bbuf.getvalue()
        rh = ResponseHeader(hres)
        c.close()
        if rh.status < 200 or rh.status >= 300:
            exc = HTTPException(bres)
            setattr(exc, 'req_data', fullParams)
            setattr(exc, 'url', url)
            setattr(exc, 'result', bres)
            setattr(exc, 'status', rh.status)
            setattr(exc, 'reason', rh.reason)
            setattr(exc, 'headers', rh.header)
            raise exc

        return bres

    def downloadFile(self, fileName, url):
        """
        Download a file with curl streaming it directly to disk
        """
        ckey, cert = self.getKeyCert()
        capath = self.getCAPath()
        import pycurl
        from WMCore.Services.pycurl_manager import ResponseHeader

        hbuf = StringIO.StringIO()

        with open(fileName, "wb") as fp:
            curl = pycurl.Curl()
            curl.setopt(pycurl.URL, url)
            curl.setopt(pycurl.WRITEDATA, fp)
            curl.setopt(pycurl.HEADERFUNCTION, hbuf.write)
            if capath:
                curl.setopt(pycurl.CAPATH, capath)
                curl.setopt(pycurl.SSL_VERIFYPEER, True)
            else:
                curl.setopt(pycurl.SSL_VERIFYPEER, False)
            if ckey:
                curl.setopt(pycurl.SSLKEY, ckey)
            if cert:
                curl.setopt(pycurl.SSLCERT, cert)
            curl.setopt(pycurl.FOLLOWLOCATION, 1)
            curl.perform()
            curl.close()

            header = ResponseHeader(hbuf.getvalue())
            if header.status < 200 or header.status >= 300:
                raise RuntimeError('Reading %s failed with code %s' % (url, header.status))
        return fileName, header
コード例 #7
0
ファイル: pycurl_manager_t.py プロジェクト: vkuznet/WMCore
class PyCurlManager(unittest.TestCase):
    """Test pycurl_manager module"""

    def setUp(self):
        "initialization"
        self.mgr = RequestHandler()
        self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem')
        self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem')

        self.cricheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: sessionid=bc1xu8zi5rbbsd5fgjuklb2tk2r3f6tw; expires=Sun, 11-Nov-2018 14:50:29 GMT; httponly; Max-Age=432000; Path=/\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n'
        self.dbsheader = 'Date: Tue, 06 Nov 2018 14:39:07 GMT\r\nServer: Apache\r\nCMS-Server-Time: D=1503 t=1541515147806112\r\nTransfer-Encoding: chunked\r\nContent-Type: text/html\r\n\r\n'
        self.HTTPheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: GRIDHTTP_PASSCODE=2c6da9c96efa2ad0farhda; domain=cms-cric.cern.ch; path=/; secure\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n'

    def testMulti(self):
        """
        Test fetch of several urls at once, one of the url relies on CERN SSO.
        """
        tfile = tempfile.NamedTemporaryFile()
        url1 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/help"
        url2 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/datatiers"
        url3 = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary"
        cern_sso_cookie(url3, tfile.name, self.cert, self.ckey)
        cookie = {url3: tfile.name}
        urls = [url1, url2, url3]
        data = getdata(urls, self.ckey, self.cert, cookie=cookie)
        headers = 0
        for row in data:
            if '200 OK' in row['headers']:
                headers += 1
        self.assertTrue(headers, 3)

    def testSingle(self):
        """
        Test single call to CERN SSO url.
        """
        # test RequestHandler
        url = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary"
        params = {}
        tfile = tempfile.NamedTemporaryFile()
        cern_sso_cookie(url, tfile.name, self.cert, self.ckey)
        cookie = {url: tfile.name}
        header, _ = self.mgr.request(url, params, cookie=cookie)
        self.assertTrue(header.status, 200)

    def testContinue(self):
        """
        Test HTTP exit code 100 - Continue
        """
        header = "HTTP/1.1 100 Continue\r\n" + self.dbsheader

        resp = ResponseHeader(header)
        self.assertIsNone(getattr(resp, "status", None))
        self.assertEqual(resp.reason, "")
        self.assertFalse(resp.fromcache)
        self.assertIn("CMS-Server-Time", resp.header)
        self.assertIn("Date", resp.header)
        self.assertEqual(resp.header['Content-Type'], 'text/html')
        self.assertEqual(resp.header['Server'], 'Apache')
        self.assertEqual(resp.header['Transfer-Encoding'], 'chunked')
        return

    def testOK(self):
        """
        Test HTTP exit code 200 - OK
        """
        header = "HTTP/1.1 200 OK\r\n" + self.dbsheader

        resp = ResponseHeader(header)
        self.assertEqual(resp.status, 200)
        self.assertEqual(resp.reason, "OK")
        self.assertFalse(resp.fromcache)
        return

    def testForbidden(self):
        """
        Test HTTP exit code 403 - Forbidden
        """
        header = "HTTP/1.1 403 Forbidden\r\n" + self.dbsheader

        resp = ResponseHeader(header)
        self.assertEqual(resp.status, 403)
        self.assertEqual(resp.reason, "Forbidden")
        self.assertFalse(resp.fromcache)
        return

    def testOKCRIC(self):
        """
        Test HTTP exit code 200 - OK for a CRIC response header
        """
        header = "HTTP/1.1 200 OK\r\n" + self.cricheader

        resp = ResponseHeader(header)
        self.assertEqual(resp.status, 200)
        self.assertEqual(resp.reason, "OK")
        self.assertFalse(resp.fromcache)
        self.assertIn("Content-Length", resp.header)
        self.assertIn("Date", resp.header)
        self.assertIn("Server", resp.header)
        self.assertIn("sessionid", resp.header['Set-Cookie'])
        self.assertEqual(resp.header['Content-Type'], 'application/json')
        self.assertEqual(resp.header['Vary'], 'Cookie')
        self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN')
        return

    def testUnavailableCRICHTTP(self):
        """
        Test HTTP exit code 503 - Service Unavailable for a CRIC response header
        when it also contains a HTTP string in the Set-Cookie header section
        """
        header = "HTTP/1.1 503 Service Unavailable\r\n" + self.HTTPheader

        resp = ResponseHeader(header)
        self.assertEqual(resp.status, 503)
        self.assertEqual(resp.reason, "Service Unavailable")
        self.assertFalse(resp.fromcache)
        self.assertIn("Content-Length", resp.header)
        self.assertIn("Date", resp.header)
        self.assertIn("Server", resp.header)
        self.assertIn("GRIDHTTP_PASSCODE", resp.header['Set-Cookie'])
        self.assertEqual(resp.header['Content-Type'], 'application/json')
        self.assertEqual(resp.header['Vary'], 'Cookie')
        self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN')
        return
コード例 #8
0
class PyCurlManager(unittest.TestCase):
    """Test pycurl_manager module"""
    def setUp(self):
        "initialization"
        self.mgr = RequestHandler()
        #self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem')
        #self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem')
        self.ckey = getKeyCertFromEnv()[0]
        self.cert = getKeyCertFromEnv()[1]

        self.cricheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: sessionid=bc1xu8zi5rbbsd5fgjuklb2tk2r3f6tw; expires=Sun, 11-Nov-2018 14:50:29 GMT; httponly; Max-Age=432000; Path=/\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n'
        self.dbsheader = 'Date: Tue, 06 Nov 2018 14:39:07 GMT\r\nServer: Apache\r\nCMS-Server-Time: D=1503 t=1541515147806112\r\nTransfer-Encoding: chunked\r\nContent-Type: text/html\r\n\r\n'
        self.HTTPheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: GRIDHTTP_PASSCODE=2c6da9c96efa2ad0farhda; domain=cms-cric.cern.ch; path=/; secure\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n'

    def testMulti(self):
        """
        Test fetch of several urls at once, one of the url relies on CERN SSO.
        """
        tfile = tempfile.NamedTemporaryFile()
        url1 = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader/help"
        url2 = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader/datatiers"
        url3 = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary"
        cern_sso_cookie(url3, tfile.name, self.cert, self.ckey)
        cookie = {url3: tfile.name}
        urls = [url1, url2, url3]
        data = getdata(urls, self.ckey, self.cert, cookie=cookie)
        headers = 0
        for row in data:
            if '200 OK' in row['headers']:
                headers += 1
        self.assertTrue(headers, 3)

    def testSingle(self):
        """
        Test single call to CERN SSO url.
        """
        # test RequestHandler
        url = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary"
        params = {}
        headers = {"Cache-Control": "no-cache"}
        tfile = tempfile.NamedTemporaryFile()
        cern_sso_cookie(url, tfile.name, self.cert, self.ckey)
        cookie = {url: tfile.name}
        header, _ = self.mgr.request(url, params, headers, cookie=cookie)
        self.assertTrue(header.status, 200)

    def testContinue(self):
        """
        Test HTTP exit code 100 - Continue
        """
        header = "HTTP/1.1 100 Continue\r\n" + self.dbsheader

        resp = ResponseHeader(header)
        self.assertIsNone(getattr(resp, "status", None))
        self.assertEqual(resp.reason, "")
        self.assertFalse(resp.fromcache)
        self.assertIn("CMS-Server-Time", resp.header)
        self.assertIn("Date", resp.header)
        self.assertEqual(resp.header['Content-Type'], 'text/html')
        self.assertEqual(resp.header['Server'], 'Apache')
        self.assertEqual(resp.header['Transfer-Encoding'], 'chunked')
        return

    def testOK(self):
        """
        Test HTTP exit code 200 - OK
        """
        header = "HTTP/1.1 200 OK\r\n" + self.dbsheader

        resp = ResponseHeader(header)
        self.assertEqual(resp.status, 200)
        self.assertEqual(resp.reason, "OK")
        self.assertFalse(resp.fromcache)
        return

    def testForbidden(self):
        """
        Test HTTP exit code 403 - Forbidden
        """
        header = "HTTP/1.1 403 Forbidden\r\n" + self.dbsheader

        resp = ResponseHeader(header)
        self.assertEqual(resp.status, 403)
        self.assertEqual(resp.reason, "Forbidden")
        self.assertFalse(resp.fromcache)
        return

    def testOKCRIC(self):
        """
        Test HTTP exit code 200 - OK for a CRIC response header
        """
        header = "HTTP/1.1 200 OK\r\n" + self.cricheader

        resp = ResponseHeader(header)
        self.assertEqual(resp.status, 200)
        self.assertEqual(resp.reason, "OK")
        self.assertFalse(resp.fromcache)
        self.assertIn("Content-Length", resp.header)
        self.assertIn("Date", resp.header)
        self.assertIn("Server", resp.header)
        self.assertIn("sessionid", resp.header['Set-Cookie'])
        self.assertEqual(resp.header['Content-Type'], 'application/json')
        self.assertEqual(resp.header['Vary'], 'Cookie')
        self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN')
        return

    def testUnavailableCRICHTTP(self):
        """
        Test HTTP exit code 503 - Service Unavailable for a CRIC response header
        when it also contains a HTTP string in the Set-Cookie header section
        """
        header = "HTTP/1.1 503 Service Unavailable\r\n" + self.HTTPheader

        resp = ResponseHeader(header)
        self.assertEqual(resp.status, 503)
        self.assertEqual(resp.reason, "Service Unavailable")
        self.assertFalse(resp.fromcache)
        self.assertIn("Content-Length", resp.header)
        self.assertIn("Date", resp.header)
        self.assertIn("Server", resp.header)
        self.assertIn("GRIDHTTP_PASSCODE", resp.header['Set-Cookie'])
        self.assertEqual(resp.header['Content-Type'], 'application/json')
        self.assertEqual(resp.header['Vary'], 'Cookie')
        self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN')
        return

    def testHeadRequest(self):
        """
        Test a HEAD request.
        """
        params = {}
        headers = {}
        url = 'https://cmsweb.cern.ch/reqmgr2/data/info'
        res = self.mgr.getheader(url,
                                 params=params,
                                 headers=headers,
                                 ckey=self.ckey,
                                 cert=self.cert)
        self.assertEqual(res.getReason(), "OK")
        self.assertTrue(len(res.getHeader()) > 10)
        # Kubernetes cluster responds with a different Server header
        serverHeader = res.getHeaderKey("Server")
        self.assertTrue(
            serverHeader.startswith("nginx/")
            or serverHeader.startswith("CherryPy/")
            or serverHeader.startswith("openresty/"))
コード例 #9
0
    def command(self, jobs, jobs_lfn, jobs_pfn, jobs_report):
        """
        For each job the worker has to complete:
        Delete files that have failed previously
        Create a temporary copyjob file
        Submit the copyjob to the appropriate FTS server
        Parse the output of the FTS transfer and return complete and failed files for recording
        """
        # Output: {"userProxyPath":"/path/to/proxy","LFNs":["lfn1","lfn2","lfn3"],"PFNs":["pfn1","pfn2","pfn3"],"FTSJobid":'id-of-fts-job', "username": '******'}
        #Loop through all the jobs for the links we have
        failure_reasons = []
        for link, copyjob in jobs.items():
            submission_error = False
            status_error = False
            fts_job = {}
            # Validate copyjob file before doing anything
            self.logger.debug("Valid %s" % self.validate_copyjob(copyjob))
            if not self.validate_copyjob(copyjob): continue

            rest_copyjob = {
                        "params":{
                                "bring_online": None,
                                "verify_checksum": False,
                                "copy_pin_lifetime": -1,
                                "max_time_in_queue": self.config.max_h_in_queue,
                                "job_metadata":{"issuer": "ASO"},
                                "spacetoken": None,
                                "source_spacetoken": None,
                                "fail_nearline": False,
                                "overwrite": True,
                                "gridftp": None
                        },
                        "files":[]
                }

            pairs = []
            for SrcDest in copyjob:
                tempDict = {"sources": [], "metadata": None, "destinations": []}

                tempDict["sources"].append(SrcDest.split(" ")[0])
                tempDict["destinations"].append(SrcDest.split(" ")[1])
                rest_copyjob["files"].append(tempDict)


            self.logger.debug("Subbmitting this REST copyjob %s" % rest_copyjob)
            url = self.fts_server_for_transfer + '/jobs'
            self.logger.debug("Running FTS submission command")
            self.logger.debug("FTS server: %s" % self.fts_server_for_transfer)
            self.logger.debug("link: %s -> %s" % link)
            heade = {"Content-Type ":"application/json"}
            buf = StringIO.StringIO()
            try:
                connection = RequestHandler(config={'timeout': 300, 'connecttimeout' : 300})
            except Exception as ex:
                msg = str(ex)
                msg += str(traceback.format_exc())
                self.logger.debug(msg)
            try:
                response, datares = connection.request(url, rest_copyjob, heade, verb='POST', doseq=True, ckey=self.user_proxy, \
                                                       cert=self.user_proxy, capath='/etc/grid-security/certificates', \
                                                       cainfo=self.user_proxy, verbose=True)
                self.logger.debug("Submission done")
                self.logger.debug('Submission header status: %s' % response.status)
                self.logger.debug('Submission header reason: %s' % response.reason)
                self.logger.debug('Submission result %s' %  datares)
            except Exception as ex:
                msg = "Error submitting to FTS: %s " % url
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.debug(msg)
                failure_reasons.append(msg)
                submission_error = True
            buf.close()
            if not submission_error:
                res = {}
                try:
                    res = json.loads(datares)
                except Exception as ex:
                    msg = "Couldn't load submission acknowledgment from FTS"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.debug(msg)
                    submission_error = True
                    failure_reasons.append(msg)
                if 'job_id' in res:
                    fileId_list = []
                    files_res = []
                    files_ = {}
                    job_id = res['job_id']
                    file_url = self.fts_server_for_transfer + '/jobs/' + job_id +'/files'
                    self.logger.debug("Submitting to %s" % file_url)
                    file_buf = StringIO.StringIO()
                    try:
                        response, files_ = connection.request(file_url, {}, heade, doseq=True, ckey=self.user_proxy, \
                                                              cert=self.user_proxy, capath='/etc/grid-security/certificates', \
                                                              cainfo=self.user_proxy, verbose=True)
                        files_res = json.loads(files_)
                    except Exception as ex:
                        msg = "Error contacting FTS to retrieve file: %s " % file_url
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.debug(msg)
                        submission_error = True
                        failure_reasons.append(msg)
                    self.logger.debug("List files in job %s" % files_)
                    file_buf.close()
                    for file_in_job in files_res:
                        if 'file_id' in file_in_job:
                            fileId_list.append(file_in_job['file_id'])
                        else:
                            msg = "Could not load submitted file %s from FTS" % file_url
                            self.logger.debug(msg)
                            submission_error = True
                            failure_reasons.append(msg)
                    self.logger.debug("File id list %s" % fileId_list)
            if submission_error:
                self.logger.debug("Submission failed")
                self.logger.info("Mark failed %s files" % len(jobs_lfn[link]))
                self.logger.debug("Mark failed %s files" % jobs_lfn[link])
                failed_files = self.mark_failed(jobs_lfn[link], force_fail=False, submission_error=True, failure_reasons=failure_reasons)
                self.logger.info("Marked failed %s" % len(failed_files))
                continue
            fts_job['userProxyPath'] = self.user_proxy
            fts_job['LFNs'] = jobs_lfn[link]
            fts_job['PFNs'] = jobs_pfn[link]
            fts_job['FTSJobid'] = job_id
            fts_job['files_id'] = fileId_list
            fts_job['username'] = self.user
            self.logger.debug("Creating json file %s in %s" % (fts_job, self.dropbox_dir))
            ftsjob_file = open('%s/Monitor.%s.json' % (self.dropbox_dir, fts_job['FTSJobid']), 'w')
            jsondata = json.dumps(fts_job)
            ftsjob_file.write(jsondata)
            ftsjob_file.close()
            self.logger.debug("%s ready." % fts_job)
            # Prepare Dashboard report
            for lfn in fts_job['LFNs']:
                lfn_report = {}
                lfn_report['FTSJobid'] = fts_job['FTSJobid']
                index = fts_job['LFNs'].index(lfn)
                lfn_report['PFN'] = fts_job['PFNs'][index]
                lfn_report['FTSFileid'] = fts_job['files_id'][index]
                lfn_report['Workflow'] = jobs_report[link][index][2]
                lfn_report['JobVersion'] = jobs_report[link][index][1]
                job_id = '%d_https://glidein.cern.ch/%d/%s_%s' % (int(jobs_report[link][index][0]), int(jobs_report[link][index][0]), lfn_report['Workflow'].replace("_", ":"), lfn_report['JobVersion'])
                lfn_report['JobId'] = job_id
                lfn_report['URL'] = self.fts_server_for_transfer
                self.logger.debug("Creating json file %s in %s for FTS3 Dashboard" % (lfn_report, self.dropbox_dir))
                dash_job_file = open('/tmp/Dashboard.%s.json' % getHashLfn(lfn_report['PFN']), 'w')
                jsondata = json.dumps(lfn_report)
                dash_job_file.write(jsondata)
                dash_job_file.close()
                self.logger.debug("%s ready for FTS Dashboard report." % lfn_report)
        return
コード例 #10
0
ファイル: Requests.py プロジェクト: samircury/WMCore
class Requests(dict):
    """
    Generic class for sending different types of HTTP Request to a given URL
    """
    def __init__(self, url='http://localhost', idict=None):
        """
        url should really be host - TODO fix that when have sufficient code
        coverage and change _getURLOpener if needed
        """
        if not idict:
            idict = {}
        dict.__init__(self, idict)
        self.pycurl = idict.get('pycurl', None)
        self.capath = idict.get('capath', None)
        if self.pycurl:
            self.reqmgr = RequestHandler()

        #set up defaults
        self.setdefault("accept_type", 'text/html')
        self.setdefault("content_type", 'application/x-www-form-urlencoded')
        self.additionalHeaders = {}

        # check for basic auth early, as if found this changes the url
        urlComponent = sanitizeURL(url)
        if urlComponent['username'] is not None:
            self.addBasicAuth(\
                urlComponent['username'], urlComponent['password'])
            url = urlComponent['url']  # remove user, password from url

        self.setdefault("host", url)

        # then update with the incoming dict
        self.update(idict)

        self['endpoint_components'] = urlparse.urlparse(self['host'])

        # If cachepath = None disable caching
        if 'cachepath' in idict and idict['cachepath'] is None:
            self["req_cache_path"] = None
        else:
            cache_dir = (self.cachePath(idict.get('cachepath'), \
                        idict.get('service_name')))
            self["cachepath"] = cache_dir
            self["req_cache_path"] = os.path.join(cache_dir, '.cache')
        self.setdefault("timeout", 300)
        self.setdefault("logger", logging)

        check_server_url(self['host'])
        # and then get the URL opener
        self.setdefault("conn", self._getURLOpener())

    def get(self,
            uri=None,
            data={},
            incoming_headers={},
            encode=True,
            decode=True,
            contentType=None):
        """
        GET some data
        """
        return self.makeRequest(uri, data, 'GET', incoming_headers, encode,
                                decode, contentType)

    def post(self,
             uri=None,
             data={},
             incoming_headers={},
             encode=True,
             decode=True,
             contentType=None):
        """
        POST some data
        """
        return self.makeRequest(uri, data, 'POST', incoming_headers, encode,
                                decode, contentType)

    def put(self,
            uri=None,
            data={},
            incoming_headers={},
            encode=True,
            decode=True,
            contentType=None):
        """
        PUT some data
        """
        return self.makeRequest(uri, data, 'PUT', incoming_headers, encode,
                                decode, contentType)

    def delete(self,
               uri=None,
               data={},
               incoming_headers={},
               encode=True,
               decode=True,
               contentType=None):
        """
        DELETE some data
        """
        return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode,
                                decode, contentType)

    def makeRequest(self,
                    uri=None,
                    data={},
                    verb='GET',
                    incoming_headers={},
                    encoder=True,
                    decoder=True,
                    contentType=None):
        """
        Wrapper around request helper functions.
        """
        if self.pycurl:
            result = self.makeRequest_pycurl(uri, data, verb, incoming_headers,
                                             encoder, decoder, contentType)
        else:
            result = self.makeRequest_httplib(uri, data, verb,
                                              incoming_headers, encoder,
                                              decoder, contentType)
        return result

    def makeRequest_pycurl(self,
                           uri=None,
                           params={},
                           verb='GET',
                           incoming_headers={},
                           encoder=True,
                           decoder=True,
                           contentType=None):
        """
        Make HTTP(s) request via pycurl library. Stay complaint with
        makeRequest_httplib method.
        """
        ckey, cert = self.getKeyCert()
        capath = self.getCAPath()
        if not contentType:
            contentType = self['content_type']
        headers = {
            "Content-type": contentType,
            "User-agent": "WMCore.Services.Requests/v001",
            "Accept": self['accept_type']
        }
        for key in self.additionalHeaders.keys():
            headers[key] = self.additionalHeaders[key]
        #And now overwrite any headers that have been passed into the call:
        headers.update(incoming_headers)
        url = self['host'] + uri
        response, data = self.reqmgr.request(url, params, headers, \
                    verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder)
        return data, response.status, response.reason, response.fromcache

    def makeRequest_httplib(self,
                            uri=None,
                            data={},
                            verb='GET',
                            incoming_headers={},
                            encoder=True,
                            decoder=True,
                            contentType=None):
        """
        Make a request to the remote database. for a give URI. The type of
        request will determine the action take by the server (be careful with
        DELETE!). Data should be a dictionary of {dataname: datavalue}.

        Returns a tuple of the data from the server, decoded using the
        appropriate method the response status and the response reason, to be
        used in error handling.

        You can override the method to encode/decode your data by passing in an
        encoding/decoding function to this method. Your encoded data must end up
        as a string.

        """
        #TODO: User agent should be:
        # $client/$client_version (CMS)
        # $http_lib/$http_lib_version $os/$os_version ($arch)
        if not contentType:
            contentType = self['content_type']
        headers = {
            "Content-type": contentType,
            "User-agent": "WMCore.Services.Requests/v001",
            "Accept": self['accept_type']
        }
        encoded_data = ''

        for key in self.additionalHeaders.keys():
            headers[key] = self.additionalHeaders[key]

        #And now overwrite any headers that have been passed into the call:
        #WARNING: doesn't work with deplate so only accept gzip
        incoming_headers["accept-encoding"] = "gzip,identity"
        headers.update(incoming_headers)

        # httpib2 requires absolute url
        uri = self['host'] + uri

        # If you're posting an attachment, the data might not be a dict
        #   please test against ConfigCache_t if you're unsure.
        #assert type(data) == type({}), \
        #        "makeRequest input data must be a dict (key/value pairs)"

        # There must be a better way to do this...
        def f():
            """Dummy function"""
            pass

        if verb != 'GET' and data:
            if type(encoder) == type(self.get) or type(encoder) == type(f):
                encoded_data = encoder(data)
            elif encoder == False:
                # Don't encode the data more than we have to
                #  we don't want to URL encode the data blindly,
                #  that breaks POSTing attachments... ConfigCache_t
                #encoded_data = urllib.urlencode(data)
                #  -- Andrew Melo 25/7/09
                encoded_data = data
            else:
                # Either the encoder is set to True or it's junk, so use
                # self.encode
                encoded_data = self.encode(data)
            headers["Content-length"] = len(encoded_data)
        elif verb == 'GET' and data:
            #encode the data as a get string
            uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True))

        headers["Content-length"] = str(len(encoded_data))

        assert type(encoded_data) == type('string'), \
            "Data in makeRequest is %s and not encoded to a string" \
                % type(encoded_data)

        # httplib2 will allow sockets to close on remote end without retrying
        # try to send request - if this fails try again - should then succeed
        try:
            response, result = self['conn'].request(uri,
                                                    method=verb,
                                                    body=encoded_data,
                                                    headers=headers)
            if response.status == 408:  # timeout can indicate a socket error
                response, result = self['conn'].request(uri,
                                                        method=verb,
                                                        body=encoded_data,
                                                        headers=headers)
        except (socket.error, AttributeError):
            # AttributeError implies initial connection error - need to close
            # & retry. httplib2 doesn't clear httplib state before next request
            # if this is threaded this may spoil things
            # only have one endpoint so don't need to determine which to shut
            [conn.close() for conn in self['conn'].connections.values()]
            self['conn'] = self._getURLOpener()
            # ... try again... if this fails propagate error to client
            try:
                response, result = self['conn'].request(uri,
                                                        method=verb,
                                                        body=encoded_data,
                                                        headers=headers)
            except AttributeError:
                # socket/httplib really screwed up - nuclear option
                self['conn'].connections = {}
                raise socket.error, 'Error contacting: %s' \
                        % self.getDomainName()
        if response.status >= 400:
            e = HTTPException()
            setattr(e, 'req_data', encoded_data)
            setattr(e, 'req_headers', headers)
            setattr(e, 'url', uri)
            setattr(e, 'result', result)
            setattr(e, 'status', response.status)
            setattr(e, 'reason', response.reason)
            setattr(e, 'headers', response)
            raise e

        if type(decoder) == type(self.makeRequest) or type(decoder) == type(f):
            result = decoder(result)
        elif decoder != False:
            result = self.decode(result)
        #TODO: maybe just return result and response...
        return result, response.status, response.reason, response.fromcache

    def encode(self, data):
        """
        encode data into some appropriate format, for now make it a string...
        """
        return urllib.urlencode(data, doseq=1)

    def decode(self, data):
        """
        decode data to some appropriate format, for now make it a string...
        """
        return data.__str__()

    def cachePath(self, given_path, service_name):
        """Return cache location"""
        if not service_name:
            service_name = 'REQUESTS'
        top = self.cacheTopPath(given_path, service_name)

        # deal with multiple Services that have the same service running and
        # with multiple users for a given Service
        if self.getUserName() is None:
            cachepath = os.path.join(top, self['endpoint_components'].netloc)
        else:
            cachepath = os.path.join(top, '%s-%s' \
                % (self.getUserName(), self.getDomainName()))

        try:
            # only we should be able to write to this dir
            os.makedirs(cachepath, stat.S_IRWXU)
        except OSError:
            if not os.path.isdir(cachepath):
                raise
            Permissions.owner_readwriteexec(cachepath)

        return cachepath

    def cacheTopPath(self, given_path, service_name):
        """Where to cache results?

        Logic:
          o If passed in take that
          o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined?
          o Is WMCORE_CACHE_DIR set
          o Generate a temporary directory
          """
        if given_path:
            return given_path
        user = str(os.getuid())
        # append user id so users don't clobber each other
        lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower())
        for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'):
            if os.environ.get(var):
                firstbit = os.environ[var]
                break
        else:
            idir = tempfile.mkdtemp(prefix='.wmcore_cache_')
            # object to store temporary directory - cleaned up on destruction
            self['deleteCacheOnExit'] = TempDirectory(idir)
            return idir

        return os.path.join(firstbit, lastbit)

    def getDomainName(self):
        """Parse netloc info to get hostname"""
        return self['endpoint_components'].hostname

    def getUserName(self):
        """Parse netloc to get user"""
        return self['endpoint_components'].username

    def _getURLOpener(self):
        """
        method getting a secure (HTTPS) connection
        """
        key, cert = None, None
        if self['endpoint_components'].scheme == 'https':
            # only add certs to https requests
            # if we have a key/cert add to request,
            # if not proceed as not all https connections require them
            try:
                key, cert = self.getKeyCert()
            except Exception, ex:
                msg = 'No certificate or key found, authentication may fail'
                self['logger'].info(msg)
                self['logger'].debug(str(ex))

        try:
            # disable validation as we don't have a single PEM with all ca's
            http = httplib2.Http(self['req_cache_path'],
                                 self['timeout'],
                                 disable_ssl_certificate_validation=True)
        except TypeError:
            # old httplib2 versions disable validation by default
            http = httplib2.Http(self['req_cache_path'], self['timeout'])

        # Domain must be just a hostname and port. self[host] is a URL currently
        if key or cert:
            http.add_certificate(key=key, cert=cert, domain='')
        return http
コード例 #11
0
class Requests(dict):
    """
    Generic class for sending different types of HTTP Request to a given URL
    """

    def __init__(self, url = 'http://localhost', idict=None):
        """
        url should really be host - TODO fix that when have sufficient code
        coverage and change _getURLOpener if needed
        """
        if  not idict:
            idict = {}
        dict.__init__(self, idict)
        self.pycurl = idict.get('pycurl', None)
        if self.pycurl:
            self.reqmgr = RequestHandler()

        #set up defaults
        self.setdefault("accept_type", 'text/html')
        self.setdefault("content_type", 'application/x-www-form-urlencoded')
        self.additionalHeaders = {}

        # check for basic auth early, as if found this changes the url
        urlComponent = sanitizeURL(url)
        if urlComponent['username'] is not None:
            self.addBasicAuth(\
                urlComponent['username'], urlComponent['password'])
            url = urlComponent['url'] # remove user, password from url

        self.setdefault("host", url)

        # then update with the incoming dict
        self.update(idict)

        self['endpoint_components'] = urlparse.urlparse(self['host'])

        # If cachepath = None disable caching
        if 'cachepath' in idict and idict['cachepath'] is None:
            self["req_cache_path"] = None
        else:
            cache_dir = (self.cachePath(idict.get('cachepath'), \
                        idict.get('service_name')))
            self["cachepath"] = cache_dir
            self["req_cache_path"] = os.path.join(cache_dir, '.cache')
        self.setdefault("timeout", 30)
        self.setdefault("logger", logging)

        check_server_url(self['host'])
        # and then get the URL opener
        self.setdefault("conn", self._getURLOpener())


    def get(self, uri=None, data={}, incoming_headers={},
               encode = True, decode=True, contentType=None):
        """
        GET some data
        """
        return self.makeRequest(uri, data, 'GET', incoming_headers,
                                encode, decode, contentType)

    def post(self, uri=None, data={}, incoming_headers={},
               encode = True, decode=True, contentType=None):
        """
        POST some data
        """
        return self.makeRequest(uri, data, 'POST', incoming_headers,
                                encode, decode, contentType)

    def put(self, uri=None, data={}, incoming_headers={},
               encode = True, decode=True, contentType=None):
        """
        PUT some data
        """
        return self.makeRequest(uri, data, 'PUT', incoming_headers,
                                encode, decode, contentType)

    def delete(self, uri=None, data={}, incoming_headers={},
               encode = True, decode=True, contentType=None):
        """
        DELETE some data
        """
        return self.makeRequest(uri, data, 'DELETE', incoming_headers,
                                encode, decode, contentType)

    def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={},
                     encoder=True, decoder=True, contentType=None):
        """
        Wrapper around request helper functions.
        """
        if  self.pycurl:
            result = self.makeRequest_pycurl(uri, data, verb, incoming_headers,
                         encoder, decoder, contentType)
        else:
            result = self.makeRequest_httplib(uri, data, verb, incoming_headers,
                         encoder, decoder, contentType)
        return result

    def makeRequest_pycurl(self, uri=None, params={}, verb='GET',
            incoming_headers={}, encoder=True, decoder=True, contentType=None):
        """
        Make HTTP(s) request via pycurl library. Stay complaint with
        makeRequest_httplib method.
        """
        ckey, cert = self.getKeyCert()
        capath = self.getCAPath()
        if  not contentType:
            contentType = self['content_type']
        headers = {"Content-type": contentType,
               "User-agent": "WMCore.Services.Requests/v001",
               "Accept": self['accept_type']}
        for key in self.additionalHeaders.keys():
            headers[key] = self.additionalHeaders[key]
        #And now overwrite any headers that have been passed into the call:
        headers.update(incoming_headers)
        url = self['host'] + uri
        response, data = self.reqmgr.request(url, params, headers, \
                    verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder)
        return data, response.status, response.reason, response.fromcache

    def makeRequest_httplib(self, uri=None, data={}, verb='GET',
            incoming_headers={}, encoder=True, decoder=True, contentType=None):
        """
        Make a request to the remote database. for a give URI. The type of
        request will determine the action take by the server (be careful with
        DELETE!). Data should be a dictionary of {dataname: datavalue}.

        Returns a tuple of the data from the server, decoded using the
        appropriate method the response status and the response reason, to be
        used in error handling.

        You can override the method to encode/decode your data by passing in an
        encoding/decoding function to this method. Your encoded data must end up
        as a string.

        """
        #TODO: User agent should be:
        # $client/$client_version (CMS)
        # $http_lib/$http_lib_version $os/$os_version ($arch)
        if  not contentType:
            contentType = self['content_type']
        headers = {"Content-type": contentType,
               "User-agent": "WMCore.Services.Requests/v001",
               "Accept": self['accept_type']}
        encoded_data = ''

        for key in self.additionalHeaders.keys():
            headers[key] = self.additionalHeaders[key]

        #And now overwrite any headers that have been passed into the call:
        headers.update(incoming_headers)

        # httpib2 requires absolute url
        uri = self['host'] + uri

        # If you're posting an attachment, the data might not be a dict
        #   please test against ConfigCache_t if you're unsure.
        #assert type(data) == type({}), \
        #        "makeRequest input data must be a dict (key/value pairs)"

        # There must be a better way to do this...
        def f():
            """Dummy function"""
            pass

        if verb != 'GET' and data:
            if type(encoder) == type(self.get) or type(encoder) == type(f):
                encoded_data = encoder(data)
            elif encoder == False:
                # Don't encode the data more than we have to
                #  we don't want to URL encode the data blindly,
                #  that breaks POSTing attachments... ConfigCache_t
                #encoded_data = urllib.urlencode(data)
                #  -- Andrew Melo 25/7/09
                encoded_data = data
            else:
                # Either the encoder is set to True or it's junk, so use
                # self.encode
                encoded_data = self.encode(data)
            headers["Content-length"] = len(encoded_data)
        elif verb == 'GET' and data:
            #encode the data as a get string
            uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True))

        headers["Content-length"] = str(len(encoded_data))

        assert type(encoded_data) == type('string'), \
            "Data in makeRequest is %s and not encoded to a string" \
                % type(encoded_data)

        # httplib2 will allow sockets to close on remote end without retrying
        # try to send request - if this fails try again - should then succeed
        try:
            response, result = self['conn'].request(uri, method = verb,
                                    body = encoded_data, headers = headers)
            if response.status == 408: # timeout can indicate a socket error
                response, result = self['conn'].request(uri, method = verb,
                                    body = encoded_data, headers = headers)
        except (socket.error, AttributeError):
            # AttributeError implies initial connection error - need to close
            # & retry. httplib2 doesn't clear httplib state before next request
            # if this is threaded this may spoil things
            # only have one endpoint so don't need to determine which to shut
            [conn.close() for conn in self['conn'].connections.values()]
            self['conn'] = self._getURLOpener()
            # ... try again... if this fails propagate error to client
            try:
                response, result = self['conn'].request(uri, method = verb,
                                    body = encoded_data, headers = headers)
            except AttributeError:
                # socket/httplib really screwed up - nuclear option
                self['conn'].connections = {}
                raise socket.error, 'Error contacting: %s' \
                        % self.getDomainName()
        if response.status >= 400:
            e = HTTPException()
            setattr(e, 'req_data', encoded_data)
            setattr(e, 'req_headers', headers)
            setattr(e, 'url', uri)
            setattr(e, 'result', result)
            setattr(e, 'status', response.status)
            setattr(e, 'reason', response.reason)
            setattr(e, 'headers', response)
            raise e

        if type(decoder) == type(self.makeRequest) or type(decoder) == type(f):
            result = decoder(result)
        elif decoder != False:
            result = self.decode(result)
        #TODO: maybe just return result and response...
        return result, response.status, response.reason, response.fromcache

    def encode(self, data):
        """
        encode data into some appropriate format, for now make it a string...
        """
        return urllib.urlencode(data, doseq=1)

    def decode(self, data):
        """
        decode data to some appropriate format, for now make it a string...
        """
        return data.__str__()

    def cachePath(self, given_path, service_name):
        """Return cache location"""
        if not service_name:
            service_name = 'REQUESTS'
        top = self.cacheTopPath(given_path, service_name)

        # deal with multiple Services that have the same service running and
        # with multiple users for a given Service
        if self.getUserName() is None:
            cachepath = os.path.join(top, self['endpoint_components'].netloc)
        else:
            cachepath = os.path.join(top, '%s-%s' \
                % (self.getUserName(), self.getDomainName()))

        try:
            # only we should be able to write to this dir
            os.makedirs(cachepath, stat.S_IRWXU)
        except OSError:
            if not os.path.isdir(cachepath):
                raise
            Permissions.owner_readwriteexec(cachepath)

        return cachepath

    def cacheTopPath(self, given_path, service_name):
        """Where to cache results?

        Logic:
          o If passed in take that
          o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined?
          o Is WMCORE_CACHE_DIR set
          o Generate a temporary directory
          """
        if given_path:
            return given_path
        user = str(os.getuid())
        # append user id so users don't clobber each other
        lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower())
        for var in ('%s_CACHE_DIR' % service_name.upper(),
                    'WMCORE_CACHE_DIR'):
            if os.environ.get(var):
                firstbit = os.environ[var]
                break
        else:
            idir = tempfile.mkdtemp(prefix='.wmcore_cache_')
            # object to store temporary directory - cleaned up on destruction
            self['deleteCacheOnExit'] = TempDirectory(idir)
            return idir

        return os.path.join(firstbit, lastbit)

    def getDomainName(self):
        """Parse netloc info to get hostname"""
        return self['endpoint_components'].hostname

    def getUserName(self):
        """Parse netloc to get user"""
        return self['endpoint_components'].username

    def _getURLOpener(self):
        """
        method getting a secure (HTTPS) connection
        """
        key, cert = None, None
        if self['endpoint_components'].scheme == 'https':
            # only add certs to https requests
            # if we have a key/cert add to request,
            # if not proceed as not all https connections require them
            try:
                key, cert = self.getKeyCert()
            except Exception, ex:
                msg = 'No certificate or key found, authentication may fail'
                self['logger'].info(msg)
                self['logger'].debug(str(ex))

        try:
            # disable validation as we don't have a single PEM with all ca's
            http = httplib2.Http(self['req_cache_path'], self['timeout'],
                                 disable_ssl_certificate_validation = True)
        except TypeError:
            # old httplib2 versions disable validation by default
            http = httplib2.Http(self['req_cache_path'], self['timeout'])

        # Domain must be just a hostname and port. self[host] is a URL currently
        if key or cert:
            http.add_certificate(key=key, cert=cert, domain='')
        return http
コード例 #12
0
ファイル: daemon.py プロジェクト: vlimant/CRABServer
class Worker(object):
    """

    """
    def __init__(self, config, quiet):
        """
        Initialise class members
        """
        self.config = config.General
        self.max_files_per_block = self.config.max_files_per_block
        self.userProxy = self.config.opsProxy
        self.block_publication_timeout = self.config.block_closure_timeout
        self.lfn_map = {}
        self.force_publication = False
        self.force_failure = False
        #TODO: logger!
        def createLogdir(dirname):
            """ Create the directory dirname ignoring erors in case it exists. Exit if
                the directory cannot be created.
            """
            try:
                os.mkdir(dirname)
            except OSError as ose:
                if ose.errno != 17: #ignore the "Directory already exists error"
                    print(str(ose))
                    print("The task worker need to access the '%s' directory" % dirname)
                    sys.exit(1)


        def setRootLogger(quiet, debug):
            """Sets the root logger with the desired verbosity level
               The root logger logs to logs/twlog.txt and every single
               logging instruction is propagated to it (not really nice
               to read)

            :arg bool quiet: it tells if a quiet logger is needed
            :arg bool debug: it tells if needs a verbose logger
            :return logger: a logger with the appropriate logger level."""

            createLogdir('logs')
            createLogdir('logs/processes')
            createLogdir('logs/tasks')

            logHandler = MultiProcessingLog('logs/log.txt', when='midnight')
            logFormatter = logging.Formatter("%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s")
            logHandler.setFormatter(logFormatter)
            logging.getLogger().addHandler(logHandler)
            loglevel = logging.INFO
            if quiet:
                loglevel = logging.WARNING
            if debug:
                loglevel = logging.DEBUG
            logging.getLogger().setLevel(loglevel)
            logger = setProcessLogger("master")
            logger.debug("PID %s.", os.getpid())
            logger.debug("Logging level initialized to %s.", loglevel)
            return logger

        self.cache_area = self.config.cache_area
        self.logger = setRootLogger(quiet, True)

        try:
            self.oracleDB = HTTPRequests(self.config.oracleDB,
                                         self.config.opsProxy,
                                         self.config.opsProxy)
            self.logger.debug('Contacting OracleDB:' + self.config.oracleDB)
        except:
            self.logger.exception('Failed when contacting Oracle')
            raise

        try:
            self.connection = RequestHandler(config={'timeout': 900, 'connecttimeout' : 900})
        except Exception as ex:
            msg = "Error initializing the connection"
            msg += str(ex)
            msg += str(traceback.format_exc())
            self.logger.debug(msg)


    def active_tasks(self, db):

        fileDoc = {}
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'acquirePublication'

        self.logger.debug("Retrieving publications from oracleDB")

        results = ''
        try:
            results = db.post(self.config.oracleFileTrans,
                              data=encodeRequest(fileDoc))
        except Exception as ex:
            self.logger.error("Failed to acquire publications \
                                from oracleDB: %s" %ex)
            return []

        fileDoc = dict()
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'acquiredPublication'
        fileDoc['grouping'] = 0
        fileDoc['limit'] = 100000

        self.logger.debug("Retrieving max.100000 acquired puclications from oracleDB")

        result = []

        try:
            results = db.get(self.config.oracleFileTrans,
                             data=encodeRequest(fileDoc))
            result.extend(oracleOutputMapping(results))
        except Exception as ex:
            self.logger.error("Failed to acquire publications \
                                from oracleDB: %s" %ex)
            return []

        self.logger.debug("publen: %s" % len(result))

        self.logger.debug("%s acquired puclications retrieved" % len(result))
        #TODO: join query for publisher (same of submitter)
        unique_tasks = [list(i) for i in set(tuple([x['username'],
                                                    x['user_group'],
                                                    x['user_role'],
                                                    x['taskname']]
                                                  ) for x in result if x['transfer_state'] == 3)]

        info = []
        for task in unique_tasks:
            info.append([x for x in result if x['taskname'] == task[3]])
        return zip(unique_tasks, info)

    def getPublDescFiles(self, workflow, lfn_ready):
        """
        Download and read the files describing
        what needs to be published
        """
        data = {}
        data['taskname'] = workflow
        data['filetype'] = 'EDM'

        out = []
        # divide lfn per chunks, avoiding URI-too long exception
        def chunks(l, n):
            """
            Yield successive n-sized chunks from l.
            :param l: list to splitt in chunks
            :param n: chunk size
            :return: yield the next list chunk
            """
            for i in range(0, len(l), n):
                yield l[i:i + n]

        for  lfn_ in chunks(lfn_ready, 50):
            data['lfn'] = lfn_

            try:
                res = self.oracleDB.get('/crabserver/dev/filemetadata',
                                        data=encodeRequest(data, listParams=["lfn"]))
                res = res[0]
            except Exception as ex:
                self.logger.error("Error during metadata retrieving: %s" %ex)

            print(len(res['result']))
            for obj in res['result']:
                if isinstance(obj, dict):
                    out.append(obj)
                else:
                    #print type(obj)
                    out.append(json.loads(str(obj)))

        return out

    def algorithm(self):
        """
        1. Get a list of users with files to publish from the couchdb instance
        2. For each user get a suitably sized input for publish
        3. Submit the publish to a subprocess
        """
        tasks = self.active_tasks(self.oracleDB)

        self.logger.debug('kicking off pool %s' % [x[0][3] for x in tasks])
        processes = []

        try:
            for task in tasks:
                p = Process(target=self.startSlave, args=(task,))
                p.start()
                processes.append(p)

            for proc in processes:
                proc.join()
        except:
            self.logger.exception("Error during process mapping")


    def startSlave(self, task):
        # TODO: lock task!
        # - process logger
        logger = setProcessLogger(str(task[0][3]))
        logger.info("Process %s is starting. PID %s", task[0][3], os.getpid())

        self.force_publication = False
        workflow = str(task[0][3])
        wfnamemsg = "%s: " % (workflow)

        if len(task[1]) > self.max_files_per_block:
            self.force_publication = True
            msg = "All datasets have more than %s ready files." % (self.max_files_per_block)
            msg += " No need to retrieve task status nor last publication time."
            logger.info(wfnamemsg+msg)
        else:
            msg = "At least one dataset has less than %s ready files." % (self.max_files_per_block)
            logger.info(wfnamemsg+msg)
            # Retrieve the workflow status. If the status can not be retrieved, continue
            # with the next workflow.
            workflow_status = ''
            url = '/'.join(self.cache_area.split('/')[:-1]) + '/workflow'
            msg = "Retrieving status from %s" % (url)
            logger.info(wfnamemsg+msg)
            buf = cStringIO.StringIO()
            header = {"Content-Type":"application/json"}
            data = {'workflow': workflow}#, 'subresource': 'taskads'}
            try:
                _, res_ = self.connection.request(url,
                                                  data,
                                                  header,
                                                  doseq=True,
                                                  ckey=self.userProxy,
                                                  cert=self.userProxy
                                                 )# , verbose=True) #  for debug
            except Exception as ex:
                if self.config.isOracle:
                    logger.exception('Error retrieving status from cache.')
                    return 0

            msg = "Status retrieved from cache. Loading task status."
            logger.info(wfnamemsg+msg)
            try:
                buf.close()
                res = json.loads(res_)
                workflow_status = res['result'][0]['status']
                msg = "Task status is %s." % workflow_status
                logger.info(wfnamemsg+msg)
            except ValueError:
                msg = "Workflow removed from WM."
                logger.error(wfnamemsg+msg)
                workflow_status = 'REMOVED'
            except Exception as ex:
                msg = "Error loading task status!"
                msg += str(ex)
                msg += str(traceback.format_exc())
                logger.error(wfnamemsg+msg)
            # If the workflow status is terminal, go ahead and publish all the ready files
            # in the workflow.
            if workflow_status in ['COMPLETED', 'FAILED', 'KILLED', 'REMOVED']:
                self.force_publication = True
                if workflow_status in ['KILLED', 'REMOVED']:
                    self.force_failure = True
                msg = "Considering task status as terminal. Will force publication."
                logger.info(wfnamemsg+msg)
            # Otherwise...
            else:
                msg = "Task status is not considered terminal."
                logger.info(wfnamemsg+msg)
                msg = "Getting last publication time."
                logger.info(wfnamemsg+msg)
                # Get when was the last time a publication was done for this workflow (this
                # should be more or less independent of the output dataset in case there are
                # more than one).
                last_publication_time = None
                data = {}
                data['workflow'] = workflow
                data['subresource'] = 'search'
                try:
                    result = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers', 'task'),
                                               data=encodeRequest(data))
                    logger.debug("task: %s " %  str(result[0]))
                    logger.debug("task: %s " %  getColumn(result[0], 'tm_last_publication'))
                except Exception as ex:
                    logger.error("Error during task doc retrieving: %s" %ex)
                if last_publication_time:
                    date = oracleOutputMapping(result)['last_publication']
                    seconds = datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f").timetuple()
                    last_publication_time = time.mktime(seconds)

                msg = "Last publication time: %s." % str(last_publication_time)
                logger.debug(wfnamemsg+msg)
                # If this is the first time a publication would be done for this workflow, go
                # ahead and publish.
                if not last_publication_time:
                    self.force_publication = True
                    msg = "There was no previous publication. Will force publication."
                    logger.info(wfnamemsg+msg)
                # Otherwise...
                else:
                    last = last_publication_time
                    msg = "Last published block: %s" % (last)
                    logger.debug(wfnamemsg+msg)
                    # If the last publication was long time ago (> our block publication timeout),
                    # go ahead and publish.
                    now = int(time.time()) - time.timezone
                    time_since_last_publication = now - last
                    hours = int(time_since_last_publication/60/60)
                    minutes = int((time_since_last_publication - hours*60*60)/60)
                    timeout_hours = int(self.block_publication_timeout/60/60)
                    timeout_minutes = int((self.block_publication_timeout - timeout_hours*60*60)/60)
                    msg = "Last publication was %sh:%sm ago" % (hours, minutes)
                    if time_since_last_publication > self.block_publication_timeout:
                        self.force_publication = True
                        msg += " (more than the timeout of %sh:%sm)." % (timeout_hours, timeout_minutes)
                        msg += " Will force publication."
                    else:
                        msg += " (less than the timeout of %sh:%sm)." % (timeout_hours, timeout_minutes)
                        msg += " Not enough to force publication."
                    logger.info(wfnamemsg+msg)

        #logger.info(task[1])
        try:
            if self.force_publication:
                # - get info
                active_ = [{'key': [x['username'],
                                    x['user_group'],
                                    x['user_role'],
                                    x['taskname']],
                            'value': [x['destination'],
                                      x['source_lfn'],
                                      x['destination_lfn'],
                                      x['input_dataset'],
                                      x['dbs_url'],
                                      x['last_update']
                                     ]}
                           for x in task[1] if x['transfer_state'] == 3 and x['publication_state'] not in [2, 3, 5]]

                lfn_ready = []
                wf_jobs_endtime = []
                pnn, input_dataset, input_dbs_url = "", "", ""
                for active_file in active_:
                    job_end_time = active_file['value'][5]
                    if job_end_time and self.config.isOracle:
                        wf_jobs_endtime.append(int(job_end_time) - time.timezone)
                    elif job_end_time:
                        wf_jobs_endtime.append(int(time.mktime(time.strptime(str(job_end_time), '%Y-%m-%d %H:%M:%S'))) - time.timezone)
                    source_lfn = active_file['value'][1]
                    dest_lfn = active_file['value'][2]
                    self.lfn_map[dest_lfn] = source_lfn
                    if not pnn or not input_dataset or not input_dbs_url:
                        pnn = str(active_file['value'][0])
                        input_dataset = str(active_file['value'][3])
                        input_dbs_url = str(active_file['value'][4])
                    lfn_ready.append(dest_lfn)

                userDN = ''
                username = task[0][0]
                user_group = ""
                if task[0][1]:
                    user_group = task[0][1]
                user_role = ""
                if task[0][2]:
                    user_role = task[0][2]
                logger.debug("Trying to get DN %s %s %s" % (username, user_group, user_role))

                try:
                    userDN = getDNFromUserName(username, logger)
                except Exception as ex:
                    msg = "Error retrieving the user DN"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logger.error(msg)
                    return 1

                # Get metadata
                toPublish = []
                publDescFiles_list = self.getPublDescFiles(workflow, lfn_ready)
                for file_ in active_:
                    for _, doc in enumerate(publDescFiles_list):
                        #logger.info(type(doc))
                        #logger.info(doc)
                        if doc["lfn"] == file_["value"][2]:
                            doc["User"] = username
                            doc["Group"] = file_["key"][1]
                            doc["Role"] = file_["key"][2]
                            doc["UserDN"] = userDN
                            doc["Destination"] = file_["value"][0]
                            doc["SourceLFN"] = file_["value"][1]
                            toPublish.append(doc)
                with open("/tmp/"+workflow+'.json', 'w') as outfile:
                    json.dump(toPublish, outfile)
                logger.info(". publisher.sh %s" % (workflow))
                subprocess.call(["/bin/bash", "/data/user/MicroASO/microPublisher/python/publisher.sh", workflow])

        except:
            logger.exception("Exception!")


        return 0