Esempio n. 1
0
 def _get_size(self):
     size = None
     url = self._url.replace('/data/eml/', '/data/size/eml/')
     r = adapter_utilities.requests_get_url_wrapper(url=url)
     if r is not None:
         size = int(r.text.strip())
     return size
Esempio n. 2
0
    def _get_acl(self, path, replacement):
        """
        Return the EML access control list of principals and permissions

        :param path: PASTA resource path fragment
        :param replacement: Modified path fragment for PASTA EML ACL
        :param owner: Data package principal owner
        :return: Access control list
        """
        auth = (properties.GMN_USER, properties.GMN_PASSWD)
        eml_acl = None
        url = self._url.replace(path, replacement)
        r = adapter_utilities.requests_get_url_wrapper(url=url, auth=auth)
        if r is not None:
            eml_acl = r.text.strip()
        acl = []
        if eml_acl is not None:
            tree = ET.ElementTree(ET.fromstring(eml_acl))
            for allow_rule in tree.iter('allow'):
                principal = allow_rule.find('./principal')
                permission = allow_rule.find('./permission')
                acl.append({
                    'principal': principal.text,
                    'permission': permission.text
                })
        if self._owner is not None:
            acl.append({
                'principal': self._owner,
                'permission': 'changePermission'
            })
        return acl
Esempio n. 3
0
 def _get_format_id(self):
     d1_formats = adapter_utilities.get_d1_formats()
     format_id = None
     url = self._url.replace('/metadata/eml/', '/metadata/format/eml/')
     r = adapter_utilities.requests_get_url_wrapper(url=url)
     if r is not None:
         eml_version = r.text.strip()
         if eml_version in d1_formats:
             format_id = d1_formats[eml_version].formatId
     return format_id
Esempio n. 4
0
 def _get_file_name(self):
     file_name = None
     url = self._url.replace('/data/eml', '/data/rmd/eml')
     r = adapter_utilities.requests_get_url_wrapper(url=url)
     if r is not None:
         rmd = r.text.strip()
     if rmd is not None:
         tree = ET.ElementTree(ET.fromstring(rmd))
         _ = tree.find(".//fileName")
         file_name = _.text
     return file_name
Esempio n. 5
0
    def _get_checksum_value(self, path, replacement):
        """
        Set the checksum value and algorithm for the given resource

        :param path: PASTA resource path fragment
        :param replacement: Modified path fragment for checksum value
        :return: None
        """
        url = self._url.replace(path, replacement)
        r = adapter_utilities.requests_get_url_wrapper(url=url)
        if r is not None:
            return r.text.strip()
Esempio n. 6
0
def parse(url=None, fromDate=None, toDate=None, scope=properties.SCOPE):
    """
    Parse the PASTA list of changes XML based on the query parameters provided
     
    :param url: changes URL as a String
    :param fromDate: fromDate as a date formatted String '%Y-%m-%dT%H:%M:%S.%f'
    :param toDate: toDate as a data formatted String '%Y-%m-%dT%H:%M:%S.%f'
    :param in_scope: in_scope filter value (only one) as a String for changes
                     query
    :return: 0 if successful, 1 otherwise
    """
    if fromDate is not None:
        url = url + 'fromDate=' + fromDate + '&'
    if toDate is not None:
        url = url + 'toDate=' + toDate + '&'
    if scope is not None:
        url = url + 'scope=' + scope

    r = adapter_utilities.requests_get_url_wrapper(url=url)

    if r is not None:
        qm = QueueManager()
        tree = ET.ElementTree(ET.fromstring(r.text.strip()))
        for dataPackage in tree.iter('dataPackage'):
            package = dataPackage.find('./packageId')
            date = dataPackage.find('./date')
            method = dataPackage.find('./serviceMethod')
            owner = dataPackage.find('./principal')
            doi = dataPackage.find('./doi')

            event = Event()
            event.package = package.text
            event.datetime = date.text
            event.method = method.text
            event.owner = owner.text
            event.doi = doi.text

            # Skip fromDate record(s) that already exist in queue
            if fromDate.rstrip('0') == date.text:
                msg = 'Skipping: {} - {} - {}'.format(package.text, date.text,
                                                      method.text)
                logger.warn(msg)
            else:
                # Provide additional filter for multiple scope values
                package_scope = event.package.split('.')[0]
                if package_scope in properties.PASTA_WHITELIST:
                    msg = 'Enqueue: {} - {} - {}'.format(
                        package.text, date.text, method.text)
                    logger.warn(msg)
                    qm.enqueue(event=event)
                else:
                    logger.info('Package {} out of scope'.format(package.text))
Esempio n. 7
0
def _assert_resource_is_public(resource_url):
    """
    Asserts that the give PASTA resource is publicly accessible

    :param resource_url: The resource URL
    :return: Boolean
    """
    public = False
    url = properties.PASTA_BASE_URL + 'authz?resourceId=' + resource_url
    r = adapter_utilities.requests_get_url_wrapper(url=url)
    if r is not None:
        public = True
    return public
Esempio n. 8
0
def _get_replication_policy(eml_url=None):
    r = adapter_utilities.requests_get_url_wrapper(url=eml_url)
    if r is not None:
        NAMESPACE_DICT = {
            'eml': 'eml://ecoinformatics.org/eml-2.1.1',
            'd1v1': 'http://ns.dataone.org/service/types/v1'
        }
        tree = ET.ElementTree(ET.fromstring(r.text))
        root = tree.getroot()
        replicationPolicy_list = root.findall(
            "additionalMetadata/metadata/d1v1:replicationPolicy",
            NAMESPACE_DICT)
        if len(replicationPolicy_list):
            return ET.tostring(replicationPolicy_list[0]).decode('utf-8')
    else:
        return None
Esempio n. 9
0
def _build_resource_list(eml_url, package_map_url, principal_owner, doi,
                         package_id):
    """
    Return a dict of data package resources without the reflexive package
    resource.

    :param package_map_url: PASTA package resource map url
    :param principal_owner: PASTA package principal owner
    :return: Dict of resource URLs
    """
    resources = {
        properties.METADATA: '',
        properties.REPORT: '',
        properties.ORE: '',
        properties.DATA: []
    }

    package_acl = None
    replication_policy = _get_replication_policy(eml_url)
    if replication_policy is not None:
        replication_policy = _generate_replication_policy(replication_policy)

    url = package_map_url
    r = adapter_utilities.requests_get_url_wrapper(url=url)
    resource_urls = r.text.split()
    for resource_url in resource_urls:
        if properties.METADATA_PATTERN in resource_url:
            rm = ResourceMetadata(resource_url, principal_owner, package_id)
            rm.replication_policy = replication_policy
            resources[properties.METADATA] = rm
            package_acl = rm.acl
        elif properties.REPORT_PATTERN in resource_url:
            rr = ResourceReport(resource_url, principal_owner, package_id)
            rr.replication_policy = replication_policy
            resources[properties.REPORT] = rr
        elif properties.DATA_PATTERN in resource_url:
            rd = ResourceData(resource_url, principal_owner)
            rd.replication_policy = replication_policy
            resources[properties.DATA].append(rd)

    ro = ResourceOre(doi, principal_owner, resources, package_id)
    ro.acl = package_acl  # Assign ORE same ACL as metadata/package ACL
    ro.replication_policy = replication_policy
    resources[properties.ORE] = ro

    return resources
Esempio n. 10
0
 def _get_size(self):
     size = None
     r = adapter_utilities.requests_get_url_wrapper(url=self._url)
     if r is not None:
         size = int(r.headers['Content-Length'])
     return size
Esempio n. 11
0
def parse(url=None, fromDate=None, toDate=None, scope=None):
    """
    Parse the PASTA list of changes XML based on the query parameters provided
     
    :param url: changes URL as a String
    :param fromDate: fromDate as a datetime
    :param toDate: toDate as a datetime
    :param scope: scope filter value (only one) as a String for changes
                     query
    :return: 0 if successful, 1 otherwise
    """

    msg = f'parse params: url-{url}, fromDate-{fromDate}, toDate-{toDate},' + \
          f' scope-{scope}'
    logger.info(msg)

    # convert to string representations
    fromDate = datetime.strftime(fromDate, '%Y-%m-%dT%H:%M:%S.%f')
    if toDate is not None:
        toDate = datetime.strftime(toDate, '%Y-%m-%dT%H:%M:%S.%f')

    # add date(s) to url
    if fromDate is not None:
        url = url + 'fromDate=' + fromDate
    if toDate is not None:
        url = url + '&toDate=' + toDate
    if scope is not None:
        url = url + '&scope=' + scope

    logger.info('requests_get_url_wrapper: ' + url)
    r = adapter_utilities.requests_get_url_wrapper(url=url, rethrow=True)

    if r is not None:
        qm = QueueManager()
        tree = ET.ElementTree(ET.fromstring(r.text.strip()))
        for dataPackage in tree.iter('dataPackage'):
            package = dataPackage.find('./packageId')
            date = dataPackage.find('./date')
            method = dataPackage.find('./serviceMethod')
            owner = dataPackage.find('./principal')
            doi = dataPackage.find('./doi')

            event = Event()
            event.package = package.text
            event.datetime = date.text
            event.method = method.text
            event.owner = owner.text
            event.doi = doi.text

            # Skip fromDate record(s) that already exist in queue
            if fromDate.rstrip('0') == date.text:
                msg = 'Skipping: {} - {} - {}'.format(package.text, date.text,
                                                      method.text)
                logger.warning(msg)
            else:
                # Provide additional filter for multiple scope values
                package_scope = event.package.split('.')[0]
                if package_scope in properties.PASTA_WHITELIST:
                    msg = 'Enqueue: {} - {} - {}'.format(
                        package.text, date.text, method.text)
                    logger.warning(msg)
                    qm.enqueue(event=event)
                else:
                    logger.info('Package {} out of scope'.format(package.text))