Exemple #1
0
def validate_gml_data(root_path, pattern, schema_file, tl):
    """
    This function validates the XML meta data file against the XML schema and performs additional consistency checks.
    If the schema_file is None, the GML data file is validated against the XML schema files provided.

    @type       root_path: string
    @param      root_path: Root directory
    @type       pattern:  string
    @param      pattern:  pattern to search metadata
    @type       tl:  workers.TaskLogger
    @param      tl:  workers.TaskLogger
    @rtype:     bool
    @return:    Validity of GML data
    """
    ns = {
        'ogr': 'http://ogr.maptools.org/',
        'gml': 'http://www.opengis.net/gml',
        'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
    }
    xmlval = XmlValidation()
    gml_data_files = [x for x in locate(pattern, root_path)]
    for ead in gml_data_files:
        logger.debug("Validating GML data file: %s" %
                     strip_prefixes(ead, root_path))
        # validate against xml schema
        result = xmlval.validate_XML_by_path(ead, schema_file)
        if schema_file is None:
            tl.addinfo(
                "Using schema files specified by the 'schemaLocation' attribute"
            )
        else:
            tl.addinfo("Using schema: " % schema_file)
        if result.valid:
            logger.debug("GML data file '%s' successfully validated." % ead)
        else:
            if schema_file is None:
                tl.adderr(
                    "Error validating against schemas using schema files specified by the 'schemaLocation' attribute:"
                )
            else:
                tl.adderr("Error validating against schema '%s': %s" %
                          (schema_file, result.err))

            for err in result.err:
                tl.adderr("- %s" % str(err))
            return False
        ead_tree = lxml.etree.parse(ead)
    return True
 def getFileElements(self, deliveryDir, delivery_xml_file, schema_file):
     log = []
     log.append("Validating delivery: %s using schema: %s" %
                (delivery_xml_file, schema_file))
     try:
         # Parse the XML file, get the root element
         parsed_mets = ParsedMets(deliveryDir)
         parsed_mets.load_mets(delivery_xml_file)
         # If the XSD file wasn't found, extract location from the XML
         if schema_file == None:
             schema_file = parsed_mets.get_mets_schema_from_schema_location(
             )
         # Parse the XSD file
         parsed_sfile = lxml.etree.parse(schema_file)
         # Validate the delivery XML file
         xmlVal = XmlValidation()
         validation_result = xmlVal.validate_XML(parsed_mets.mets_tree,
                                                 parsed_sfile)
         if validation_result:
             return parsed_mets.get_file_elements()
     except (XMLSyntaxError), why:
         errmsg = 'Error validating delivery %s, why: %s' % (
             delivery_xml_file, str(why))
Exemple #3
0
    def test_log(self):
        self.test_root = os.path.split(
            os.path.abspath(os.path.dirname(__file__)))[0]
        with open(
                os.path.join(self.test_root,
                             '../../earkresources/PREMIS_skeleton.xml'),
                'r') as premis_file:
            self.my_premis = Premis(premis_file)

        self.my_premis.add_agent('Aip2Dip')
        self.my_premis.add_event('Migration01', 'success', 'Aip2Dip')
        self.my_premis.add_object('file.txt')
        premis_xml = pretty_xml_string(self.my_premis.to_string())
        print premis_xml

        xmlval = XmlValidation()
        parsed_xml = etree.fromstring(premis_xml)
        parsed_schema = etree.parse(
            '../../../earkresources/schemas/premis-v2-2.xsd')
        validation_result = xmlval.validate_XML(parsed_xml, parsed_schema)
        if len(validation_result.err) > 0:
            print validation_result.err
        self.assertTrue(validation_result.valid)
Exemple #4
0
def submit_order(request):
    print 'received request' + request.method
    validator = XmlValidation()

    #{ "order_title" : "example title", "aip_identifiers" : [ "b7738768-032d-3db1-eb42-b09611e6e6c6", "916c659c-909d-ad94-2289-c7ee8e7482d9"]}
    if request.method == 'POST':
        order_json = json.loads(request.body)

        if "order_title" not in order_json:
            response = {
                'process_id': None,
                'error': "Missing order_title element in order request."
            }
            return HttpResponse(json.dumps(response))
        if "aip_identifiers" not in order_json:
            response = {
                'process_id': None,
                'error': "Missing aip_identifiers element in order request."
            }
            return HttpResponse(json.dumps(response))

        order_title = order_json["order_title"]
        aip_identifiers = order_json["aip_identifiers"]

        # verify that all necessary AIPs exist return error otherwise
        for aip_identifier in aip_identifiers:
            if InformationPackage.objects.filter(
                    identifier=aip_identifier).count() == 0:
                response = {
                    'process_id': None,
                    'error': "Unknown IP for provided UUID %s" % aip_identifier
                }
                return HttpResponse(json.dumps(response))
        try:
            dip = DIP.objects.create(name=order_title)
        except Exception as e:
            response = {'process_id': None, 'error': repr(e)}
            return HttpResponse(json.dumps(response))

        process_id = getUniqueID()
        wf = WorkflowModules.objects.get(identifier=AIPtoDIPReset.__name__)
        InformationPackage.objects.create(path=os.path.join(
            config_path_work, process_id),
                                          uuid=process_id,
                                          statusprocess=0,
                                          packagename=order_title,
                                          last_task=wf)
        print "Created DIP with UUID %s" % aip_identifier

        for aip_identifier in aip_identifiers:
            # if entry does not exist in search_aip add it from earkweb_informationpackage
            if AIP.objects.filter(identifier=aip_identifier).count() == 0:
                ip = InformationPackage.objects.get(identifier=aip_identifier)
                aip = AIP.objects.create(identifier=aip_identifier,
                                         cleanid="",
                                         source=ip.storage_loc,
                                         date_selected=timezone.now())

            aip = AIP.objects.get(identifier=aip_identifier)
            Inclusion(aip=aip, dip=dip).save()
            print "Added existing package %s" % aip_identifier

        response = {"process_id": process_id, 'status': 'Submitted'}
        return HttpResponse(json.dumps(response))
    else:
        response = {'process_id': None, 'error': "Unsupported GET request."}
        return HttpResponse(json.dumps(response))
    def validate_delivery(self, deliveryDir, delivery_xml_file, schema_file,
                          package_file):
        """
        Validate the delivery METS document. Does XML validation of the delivery METS file and fixity check on file
        level.

        @type       deliveryDir: string
        @param      deliveryDir: Path to delivery directory
        @type       delivery_xml_file:  string
        @param      delivery_xml_file:  Path to delivery METS file.
        @type       package_file:  string
        @param      package_file:  Path to package file file (e.g. TAR).
        @rtype:     ValidationResult
        @return:    Validation result (validity, process log, error log)
        """
        valid = False
        log = []
        err = []
        valid_xml = False
        valid_checksum = False
        log.append(
            "Validating delivery: %s using schema: %s and package file %s" %
            (delivery_xml_file, schema_file, package_file))

        try:
            # Parse the XML file, get the root element
            parsed_mets = ParsedMets(deliveryDir)
            parsed_mets.load_mets(delivery_xml_file)
            # If the XSD file wasn't found, extract location from the XML
            if schema_file == None:
                schema_file = parsed_mets.get_mets_schema_from_schema_location(
                )
            # Parse the XSD file
            parsed_sfile = lxml.etree.parse(schema_file)
            # Validate the delivery XML file
            xmlVal = XmlValidation()
            validation_result = xmlVal.validate_XML(parsed_mets.mets_tree,
                                                    parsed_sfile)
            valid_xml = validation_result
            # Checksum validation
            checksum_expected = ParsedMets.get_file_element_checksum(
                parsed_mets.get_first_file_element())
            checksum_algorithm = ParsedMets.get_file_element_checksum_algorithm(
                parsed_mets.get_first_file_element())
            csval = ChecksumValidation()
            valid_checksum = csval.validate_checksum(
                package_file, checksum_expected,
                ChecksumAlgorithm.get(checksum_algorithm))
            # Mets validation
            mval = MetsValidation(parsed_mets)
            valid_files_size = mval.validate_files_size()

            log += validation_result.log
            err += validation_result.err

            log += valid_files_size.log
            err += valid_files_size.err

            log.append("Checksum validity: \"%s\"" % str(valid_checksum))

            valid = (valid_xml.valid and valid_checksum
                     and valid_files_size.valid)

            return ValidationResult(valid, log, err)

        except (XMLSyntaxError), why:
            errmsg = 'Error validating delivery %s, why: %s' % (
                delivery_xml_file, str(why))
            err.append(errmsg)
            return ValidationResult(False, log, err)
Exemple #6
0
def validate_ead_metadata(root_path, pattern, schema_file, tl):
    """
    This function validates the XML meta data file against the XML schema and performs additional consistency checks.
    If the schema_file is None, the EAD metadata file is validated against the XML schema files provided.

    @type       root_path: string
    @param      root_path: Root directory
    @type       pattern:  string
    @param      pattern:  pattern to search metadata
    @type       tl:  workers.TaskLogger
    @param      tl:  workers.TaskLogger
    @rtype:     bool
    @return:    Validity of EAD metadata
    """
    # ead 2002: ns = {'ead': 'http://ead3.archivists.org/schema/', 'xlink': 'http://www.w3.org/1999/xlink', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}
    ns = {
        'ead': 'http://ead3.archivists.org/schema/',
        'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
    }
    xmlval = XmlValidation()
    ead_md_files = [x for x in locate(pattern, root_path)]
    for ead in ead_md_files:
        logger.debug("Validating EAD metadata file: %s" %
                     strip_prefixes(ead, root_path))
        # validate against xml schema
        result = xmlval.validate_XML_by_path(ead, schema_file)
        if schema_file is None:
            tl.addinfo(
                "Using schema files specified by the 'schemaLocation' attribute"
            )
        else:
            tl.addinfo("Using schema: " % schema_file)
        if result.valid:
            logger.debug("Metadata file '%s' successfully validated." % ead)
        else:
            if schema_file is None:
                tl.adderr(
                    "Error validating against schemas using schema files specified by the 'schemaLocation' attribute:"
                )
            else:
                tl.adderr("Error validating against schema '%s': %s" %
                          (schema_file, result.err))

            for err in result.err:
                tl.adderr("- %s" % str(err))
            return False
        ead_tree = lxml.etree.parse(ead)
        # check dao hrefs
        res = ead_tree.getroot().xpath('//ead:dao', namespaces=ns)
        if len(res) == 0:
            tl.addinfo("The EAD file does not contain any file references.")
        ead_dir, tail = os.path.split(ead)
        references_valid = True
        for dao in res:
            # ead 2002: dao_ref_file = os.path.join(ead_dir, remove_protocol(dao.attrib['{http://www.w3.org/1999/xlink}href']))
            dao_ref_file = os.path.join(ead_dir,
                                        remove_protocol(dao.attrib['href']))
            if not os.path.exists(dao_ref_file):
                references_valid = False
                tl.adderr(
                    "DAO file reference error - File does not exist: %s" %
                    dao_ref_file, False)  # False
        if not references_valid:
            tl.adderr(
                "DAO file reference errors. Please consult the log file for details."
            )
            return False
    return True