コード例 #1
0
class FormatValidator(BaseValidator):
    """
    Validates the format of a file against the given ``context``.
    """
    def __init__(self, *args, **kwargs):
        super(FormatValidator, self).__init__(*args, **kwargs)

        allow_unknown = self.options.get('allow_unknown_file_types', False)
        self.fid = FormatIdentifier(allow_unknown_file_types=allow_unknown)

    def validate(self, filepath, expected=None):
        logger.debug('Validating format of %s' % filepath)

        name, version, reg_key = expected
        if not any(f is not None for f in (name, version, reg_key)):
            raise ValueError(
                'At least one of name, version and registry key is required')

        val_obj = Validation.objects.create(filename=filepath,
                                            time_started=timezone.now(),
                                            validator=self.__class__.__name__,
                                            required=self.required,
                                            task=self.task,
                                            information_package=self.ip,
                                            responsible=self.responsible,
                                            specification={
                                                'context': self.context,
                                                'options': self.options,
                                            })

        passed = False
        try:
            actual_name, actual_version, actual_reg_key = self.fid.identify_file_format(
                filepath)
            if name and name != actual_name:
                raise ValidationError(
                    "format name for {} is not valid, ({} !={})".format(
                        filepath, name, actual_name))
            if version and version != actual_version:
                raise ValidationError(
                    "format version for {} is not valid, ({} != {})".format(
                        filepath, version, actual_version))
            if reg_key and reg_key != actual_reg_key:
                raise ValidationError(
                    "format registry key for {} is not valid, ({} != {})".
                    format(filepath, reg_key, actual_reg_key))

            passed = True
        except ValidationError:
            val_obj.message = traceback.format_exc()
            raise
        else:
            message = 'Successfully validated checksum of %s' % filepath
            val_obj.message = message
            logger.info(message)
        finally:
            val_obj.time_done = timezone.now()
            val_obj.passed = passed
            val_obj.save(update_fields=['time_done', 'passed', 'message'])
コード例 #2
0
    def setUp(self):
        self.content = b'test file'
        self.test_file = tempfile.NamedTemporaryFile(suffix='.txt', delete=False)
        self.addCleanup(os.remove, self.test_file.name)

        self.test_file.write(self.content)
        self.test_file.seek(0)
        self.test_file.close()

        fid = FormatIdentifier()
        self.expected = fid.identify_file_format(self.test_file.name)
コード例 #3
0
ファイル: ingest.py プロジェクト: ESSolutions/ESSArch
def index_document(tag_version, filepath):
    exclude_file_format_from_indexing_content = settings.EXCLUDE_FILE_FORMAT_FROM_INDEXING_CONTENT

    fid = FormatIdentifier()
    (format_name, format_version, format_registry_key) = fid.identify_file_format(filepath)
    if format_registry_key not in exclude_file_format_from_indexing_content:
        index_file_content = True
    else:
        index_file_content = False

    ip = tag_version.tag.information_package
    extension = os.path.splitext(tag_version.name)[1][1:]
    dirname = os.path.dirname(filepath)
    href = normalize_path(os.path.relpath(dirname, ip.object_path))
    href = '' if href == '.' else href
    size, _ = get_tree_size_and_count(filepath)
    modified = timestamp_to_datetime(os.stat(filepath).st_mtime)

    tag_version.custom_fields = {
        'extension': extension,
        'dirname': dirname,
        'href': href,
        'filename': tag_version.name,
        'size': size,
        'modified': modified,
        'formatname': format_name,
        'formatversion': format_version,
        'formatkey': format_registry_key,
    }

    doc = File.from_obj(tag_version)

    try:
        if index_file_content:
            with open(filepath, 'rb') as f:
                content = f.read()
            doc.data = base64.b64encode(content).decode("ascii")
            doc.save(pipeline='ingest_attachment')
        else:
            logger.debug('Skip to index file content for {}'.format(filepath))
            doc.save()
    except ElasticsearchException:
        logger.exception('Failed to index {}'.format(filepath))
        raise
    return doc, tag_version
コード例 #4
0
def validate_file_format(filename, format_name, format_registry_key,
                         format_version):
    """
    Validates the format of the given file
    """

    fid = FormatIdentifier()
    actual_format_name, actual_format_version, actual_format_registry_key = fid.identify_file_format(
        filename)

    if format_name:
        assert actual_format_name == format_name, (
            "format name for %s is not valid, (%s != %s)" % filename,
            format_name, actual_format_name)

    if format_version:
        assert actual_format_version == format_version, "format version for %s is not valid" % filename

    if format_registry_key:
        assert actual_format_registry_key == format_registry_key, (
            "format registry key for %s is not valid" % filename)

    return "Success"
コード例 #5
0
 def cli(path):
     fid = FormatIdentifier()
     res = fid.identify_file_format(path)
     click.echo(res)