def parse_pfns(self, pfns): """ Splits the given PFN into the parts known by the protocol. During parsing the PFN is also checked for validity on the given RSE with the given protocol. :param pfn: a fully qualified PFN :returns: a dict containing all known parts of the PFN for the protocol e.g. scheme, path, filename :raises RSEFileNameNotSupported: if the provided PFN doesn't match with the protocol settings """ ret = dict() pfns = [pfns] if isinstance(pfns, string_types) else pfns for pfn in pfns: parsed = urlparse.urlparse(pfn) if parsed.path.startswith('/srm/managerv2') or\ parsed.path.startswith('/srm/managerv1') or\ parsed.path.startswith('/srm/v2/server'): scheme, hostname, port, service_path, path = re.findall(r"([^:]+)://([^:/]+):?(\d+)?([^:]+=)?([^:]+)", pfn)[0] else: scheme = parsed.scheme hostname = parsed.netloc.partition(':')[0] port = parsed.netloc.partition(':')[2] path = parsed.path service_path = '' # force type conversion try: port = int(port) except: port = '' if self.attributes['hostname'] != hostname and\ self.attributes['hostname'] != scheme + "://" + hostname: raise exception.RSEFileNameNotSupported('Invalid hostname: provided \'%s\', expected \'%s\'' % (hostname, self.attributes['hostname'])) if port != '' and str(self.attributes['port']) != str(port): raise exception.RSEFileNameNotSupported('Invalid port: provided \'%s\', expected \'%s\'' % (port, self.attributes['port'])) elif port == '': port = self.attributes['port'] if not path.startswith(self.attributes['prefix']): raise exception.RSEFileNameNotSupported('Invalid prefix: provided \'%s\', expected \'%s\'' % ('/'.join(path.split('/')[0:len(self.attributes['prefix'].split('/')) - 1]), self.attributes['prefix'])) # len(...)-1 due to the leading '/ # Spliting path into prefix, path, filename prefix = self.attributes['prefix'] path = path.partition(self.attributes['prefix'])[2] name = path.split('/')[-1] path = '/' + '/'.join(path.split('/')[:-1]) if not self.rse['staging_area'] else None if path != '/' and path[:-1] != '/': path += '/' ret[pfn] = {'scheme': scheme, 'port': port, 'hostname': hostname, 'path': path, 'name': name, 'prefix': prefix, 'web_service_path': service_path} return ret
def parse_pfns(self, pfns): """ Splits the given PFN into the parts known by the protocol. It is also checked if the provided protocol supportes the given PFNs. :param pfns: a list of a fully qualified PFNs :returns: dic with PFN as key and a dict with path and name as value :raises RSEFileNameNotSupported: if the provided PFN doesn't match with the protocol settings """ ret = dict() pfns = [pfns] if isinstance(pfns, string_types) else pfns logging.debug('... Beginning GlobusRSEProtocol.parse_pfns ... ') for pfn in pfns: parsed = urlparse(pfn) scheme = parsed.scheme hostname = parsed.netloc.partition(':')[0] port = int(parsed.netloc.partition(':')[2]) if parsed.netloc.partition(':')[2] != '' else 0 while '//' in parsed.path: parsed = parsed._replace(path=parsed.path.replace('//', '/')) path = parsed.path # Protect against 'lazy' defined prefixes for RSEs in the repository if not self.attributes['prefix'].startswith('/'): self.attributes['prefix'] = '/' + self.attributes['prefix'] if not self.attributes['prefix'].endswith('/'): self.attributes['prefix'] += '/' if self.attributes['hostname'] != hostname: if self.attributes['hostname'] != 'localhost': # In the database empty hostnames are replaced with localhost but for some URIs (e.g. file) a hostname is not included raise exception.RSEFileNameNotSupported('Invalid hostname: provided \'%s\', expected \'%s\'' % (hostname, self.attributes['hostname'])) if self.attributes['port'] != port: raise exception.RSEFileNameNotSupported('Invalid port: provided \'%s\', expected \'%s\'' % (port, self.attributes['port'])) if not path.startswith(self.attributes['prefix']): raise exception.RSEFileNameNotSupported('Invalid prefix: provided \'%s\', expected \'%s\'' % ('/'.join(path.split('/')[0:len(self.attributes['prefix'].split('/')) - 1]), self.attributes['prefix'])) # len(...)-1 due to the leading '/ # Spliting parsed.path into prefix, path, filename prefix = self.attributes['prefix'] path = path.partition(self.attributes['prefix'])[2] name = path.split('/')[-1] path = '/'.join(path.split('/')[:-1]) if not path.startswith('/'): path = '/' + path if path != '/' and not path.endswith('/'): path = path + '/' ret[pfn] = {'path': path, 'name': name, 'scheme': scheme, 'prefix': prefix, 'port': port, 'hostname': hostname, } return ret
def split_pfn(self, pfn): """ Splits the given PFN into the parts known by the protocol. During parsing the PFN is also checked for validity on the given RSE with the given protocol. :param pfn: a fully qualified PFN :returns: a dict containing all known parts of the PFN for the protocol e.g. scheme, hostname, port, prefix, path, filename :raises RSEFileNameNotSupported: if the provided PFN doesn't match with the protocol settings """ parsed = urlparse(pfn) ret = dict() ret['scheme'] = parsed.scheme ret['hostname'] = parsed.netloc.partition(':')[0] ret['port'] = int(parsed.netloc.partition(':') [2]) if parsed.netloc.partition(':')[2] != '' else 0 ret['path'] = parsed.path # Protect against 'lazy' defined prefixes for RSEs in the repository self.rse['prefix'] = '' if self.rse['prefix'] is None else self.rse[ 'prefix'] if not self.rse['prefix'].startswith('/'): self.rse['prefix'] = '/' + self.rse['prefix'] if not self.rse['prefix'].endswith('/'): self.rse['prefix'] += '/' if self.rse['hostname'] != ret['hostname']: raise exception.RSEFileNameNotSupported( 'Invalid hostname: provided \'%s\', expected \'%s\'' % (ret['hostname'], self.rse['hostname'])) if not ret['path'].startswith(self.rse['prefix']): raise exception.RSEFileNameNotSupported( 'Invalid prefix: provided \'%s\', expected \'%s\'' % ('/'.join(ret['path'].split('/') [0:len(self.rse['prefix'].split('/')) - 1]), self.rse['prefix'])) # len(...)-1 due to the leading '/ # Spliting parsed.path into prefix, path, filename ret['prefix'] = self.rse['prefix'] ret['path'] = ret['path'].partition(self.rse['prefix'])[2] ret['name'] = ret['path'].split('/')[-1] ret['path'] = ret['path'].partition(ret['name'])[0] return ret