def _get_size(self): size = None url = self._url.replace('/data/eml/', '/data/size/eml/') r = adapter_utilities.requests_get_url_wrapper(url=url) if r is not None: size = int(r.text.strip()) return size
def _get_acl(self, path, replacement): """ Return the EML access control list of principals and permissions :param path: PASTA resource path fragment :param replacement: Modified path fragment for PASTA EML ACL :param owner: Data package principal owner :return: Access control list """ auth = (properties.GMN_USER, properties.GMN_PASSWD) eml_acl = None url = self._url.replace(path, replacement) r = adapter_utilities.requests_get_url_wrapper(url=url, auth=auth) if r is not None: eml_acl = r.text.strip() acl = [] if eml_acl is not None: tree = ET.ElementTree(ET.fromstring(eml_acl)) for allow_rule in tree.iter('allow'): principal = allow_rule.find('./principal') permission = allow_rule.find('./permission') acl.append({ 'principal': principal.text, 'permission': permission.text }) if self._owner is not None: acl.append({ 'principal': self._owner, 'permission': 'changePermission' }) return acl
def _get_format_id(self): d1_formats = adapter_utilities.get_d1_formats() format_id = None url = self._url.replace('/metadata/eml/', '/metadata/format/eml/') r = adapter_utilities.requests_get_url_wrapper(url=url) if r is not None: eml_version = r.text.strip() if eml_version in d1_formats: format_id = d1_formats[eml_version].formatId return format_id
def _get_file_name(self): file_name = None url = self._url.replace('/data/eml', '/data/rmd/eml') r = adapter_utilities.requests_get_url_wrapper(url=url) if r is not None: rmd = r.text.strip() if rmd is not None: tree = ET.ElementTree(ET.fromstring(rmd)) _ = tree.find(".//fileName") file_name = _.text return file_name
def _get_checksum_value(self, path, replacement): """ Set the checksum value and algorithm for the given resource :param path: PASTA resource path fragment :param replacement: Modified path fragment for checksum value :return: None """ url = self._url.replace(path, replacement) r = adapter_utilities.requests_get_url_wrapper(url=url) if r is not None: return r.text.strip()
def parse(url=None, fromDate=None, toDate=None, scope=properties.SCOPE): """ Parse the PASTA list of changes XML based on the query parameters provided :param url: changes URL as a String :param fromDate: fromDate as a date formatted String '%Y-%m-%dT%H:%M:%S.%f' :param toDate: toDate as a data formatted String '%Y-%m-%dT%H:%M:%S.%f' :param in_scope: in_scope filter value (only one) as a String for changes query :return: 0 if successful, 1 otherwise """ if fromDate is not None: url = url + 'fromDate=' + fromDate + '&' if toDate is not None: url = url + 'toDate=' + toDate + '&' if scope is not None: url = url + 'scope=' + scope r = adapter_utilities.requests_get_url_wrapper(url=url) if r is not None: qm = QueueManager() tree = ET.ElementTree(ET.fromstring(r.text.strip())) for dataPackage in tree.iter('dataPackage'): package = dataPackage.find('./packageId') date = dataPackage.find('./date') method = dataPackage.find('./serviceMethod') owner = dataPackage.find('./principal') doi = dataPackage.find('./doi') event = Event() event.package = package.text event.datetime = date.text event.method = method.text event.owner = owner.text event.doi = doi.text # Skip fromDate record(s) that already exist in queue if fromDate.rstrip('0') == date.text: msg = 'Skipping: {} - {} - {}'.format(package.text, date.text, method.text) logger.warn(msg) else: # Provide additional filter for multiple scope values package_scope = event.package.split('.')[0] if package_scope in properties.PASTA_WHITELIST: msg = 'Enqueue: {} - {} - {}'.format( package.text, date.text, method.text) logger.warn(msg) qm.enqueue(event=event) else: logger.info('Package {} out of scope'.format(package.text))
def _assert_resource_is_public(resource_url): """ Asserts that the give PASTA resource is publicly accessible :param resource_url: The resource URL :return: Boolean """ public = False url = properties.PASTA_BASE_URL + 'authz?resourceId=' + resource_url r = adapter_utilities.requests_get_url_wrapper(url=url) if r is not None: public = True return public
def _get_replication_policy(eml_url=None): r = adapter_utilities.requests_get_url_wrapper(url=eml_url) if r is not None: NAMESPACE_DICT = { 'eml': 'eml://ecoinformatics.org/eml-2.1.1', 'd1v1': 'http://ns.dataone.org/service/types/v1' } tree = ET.ElementTree(ET.fromstring(r.text)) root = tree.getroot() replicationPolicy_list = root.findall( "additionalMetadata/metadata/d1v1:replicationPolicy", NAMESPACE_DICT) if len(replicationPolicy_list): return ET.tostring(replicationPolicy_list[0]).decode('utf-8') else: return None
def _build_resource_list(eml_url, package_map_url, principal_owner, doi, package_id): """ Return a dict of data package resources without the reflexive package resource. :param package_map_url: PASTA package resource map url :param principal_owner: PASTA package principal owner :return: Dict of resource URLs """ resources = { properties.METADATA: '', properties.REPORT: '', properties.ORE: '', properties.DATA: [] } package_acl = None replication_policy = _get_replication_policy(eml_url) if replication_policy is not None: replication_policy = _generate_replication_policy(replication_policy) url = package_map_url r = adapter_utilities.requests_get_url_wrapper(url=url) resource_urls = r.text.split() for resource_url in resource_urls: if properties.METADATA_PATTERN in resource_url: rm = ResourceMetadata(resource_url, principal_owner, package_id) rm.replication_policy = replication_policy resources[properties.METADATA] = rm package_acl = rm.acl elif properties.REPORT_PATTERN in resource_url: rr = ResourceReport(resource_url, principal_owner, package_id) rr.replication_policy = replication_policy resources[properties.REPORT] = rr elif properties.DATA_PATTERN in resource_url: rd = ResourceData(resource_url, principal_owner) rd.replication_policy = replication_policy resources[properties.DATA].append(rd) ro = ResourceOre(doi, principal_owner, resources, package_id) ro.acl = package_acl # Assign ORE same ACL as metadata/package ACL ro.replication_policy = replication_policy resources[properties.ORE] = ro return resources
def _get_size(self): size = None r = adapter_utilities.requests_get_url_wrapper(url=self._url) if r is not None: size = int(r.headers['Content-Length']) return size
def parse(url=None, fromDate=None, toDate=None, scope=None): """ Parse the PASTA list of changes XML based on the query parameters provided :param url: changes URL as a String :param fromDate: fromDate as a datetime :param toDate: toDate as a datetime :param scope: scope filter value (only one) as a String for changes query :return: 0 if successful, 1 otherwise """ msg = f'parse params: url-{url}, fromDate-{fromDate}, toDate-{toDate},' + \ f' scope-{scope}' logger.info(msg) # convert to string representations fromDate = datetime.strftime(fromDate, '%Y-%m-%dT%H:%M:%S.%f') if toDate is not None: toDate = datetime.strftime(toDate, '%Y-%m-%dT%H:%M:%S.%f') # add date(s) to url if fromDate is not None: url = url + 'fromDate=' + fromDate if toDate is not None: url = url + '&toDate=' + toDate if scope is not None: url = url + '&scope=' + scope logger.info('requests_get_url_wrapper: ' + url) r = adapter_utilities.requests_get_url_wrapper(url=url, rethrow=True) if r is not None: qm = QueueManager() tree = ET.ElementTree(ET.fromstring(r.text.strip())) for dataPackage in tree.iter('dataPackage'): package = dataPackage.find('./packageId') date = dataPackage.find('./date') method = dataPackage.find('./serviceMethod') owner = dataPackage.find('./principal') doi = dataPackage.find('./doi') event = Event() event.package = package.text event.datetime = date.text event.method = method.text event.owner = owner.text event.doi = doi.text # Skip fromDate record(s) that already exist in queue if fromDate.rstrip('0') == date.text: msg = 'Skipping: {} - {} - {}'.format(package.text, date.text, method.text) logger.warning(msg) else: # Provide additional filter for multiple scope values package_scope = event.package.split('.')[0] if package_scope in properties.PASTA_WHITELIST: msg = 'Enqueue: {} - {} - {}'.format( package.text, date.text, method.text) logger.warning(msg) qm.enqueue(event=event) else: logger.info('Package {} out of scope'.format(package.text))