Example #1
0
    def __call__(self, validate_all=False, data=None):
        if data is None:
            data = json_body(self.request)

        if 'message' in data:
            field = IMail['message']
            deserializer = queryMultiAdapter(
                (field, self.context, self.request), IFieldDeserializer)
            message = deserializer(data['message'])

            if message and message.filename.lower().endswith('.msg'):
                self.context.original_message = message
                transform = Msg2MimeTransform()
                eml = transform.transform(message.data)
                data['message'] = {
                    'data': eml,
                    'content-type': 'message/rfc822',
                    'filename': message.filename[:-3] + 'eml',
                }

        context = super(DeserializeMailFromJson, self).__call__(
            validate_all=validate_all, data=data)

        context._update_title_from_message_subject()
        initialize_metadata(context, None)
        initalize_title(context, None)
        return context
Example #2
0
    def __call__(self, validate_all=False, data=None, create=False):
        if data is None:
            data = json_body(self.request)

        context = super(DeserializeMailFromJson,
                        self).__call__(validate_all=validate_all,
                                       data=data,
                                       create=create)

        if context.message and context.message.filename.lower().endswith(
                '.msg'):
            self.context.original_message = context.message
            transform = Msg2MimeTransform()
            eml = transform.transform(context.message.data)
            file_ = NamedBlobFile(data=eml,
                                  filename=context.message.filename[:-3] +
                                  'eml',
                                  contentType='message/rfc822')
            context.message = file_

        if create and 'message' in data:
            if not data.get('title'):
                context._update_title_from_message_subject()
                initalize_title(context, None)

            initialize_metadata(context, None)

        return context
Example #3
0
 def run_after_creation_jobs(self, item, obj):
     """Fire these event handlers manually because they got fired
     too early before (when the file contents weren't loaded yet)
     """
     if self.is_mail(item):
         initialize_metadata(obj, None)
         if obj.title == NO_SUBJECT_TITLE_FALLBACK:
             # Reset the [No Subject] placeholder
             obj.title = None
             initalize_title(obj, None)
     else:
         sync_title_and_filename_handler(obj, None)
         set_digitally_available(obj, None)
 def run_after_creation_jobs(self, item, obj):
     """Fire these event handlers manually because they got fired
     too early before (when the file contents weren't loaded yet)
     """
     if self.is_mail(item):
         initialize_metadata(obj, None)
         if obj.title == NO_SUBJECT_TITLE_FALLBACK:
             # Reset the [No Subject] placeholder
             obj.title = None
             initalize_title(obj, None)
     else:
         sync_title_and_filename_handler(obj, None)
         set_digitally_available(obj, None)
Example #5
0
    def __iter__(self):
        for item in self.previous:

            if self.is_mail(item):
                file_field = IMail['message']
            else:
                file_field = IDocumentSchema['file']

            keys = item.keys()
            pathkey = self.pathkey(*keys)[0]

            if self.key in item:
                filepath = item[self.key]
                if filepath is None:
                    yield item
                    continue

                if pathkey not in item:
                    logger.warning("Missing path key for file %s" % filepath)
                    yield item
                    continue
                path = item[pathkey]

                filepath = os.path.join(self.bundle_path, filepath)
                filename = os.path.basename(filepath)

                # TODO: Check for this in OGGBundle validation
                if filepath.endswith(u'.msg'):
                    logger.warning("Skipping .msg file: %s" % filepath)
                    self.stats['errors']['msgs'][filepath] = path
                    yield item
                    continue

                # TODO: Check for this in OGGBundle validation
                if not os.path.exists(filepath):
                    logger.warning("File not found: %s" % filepath)
                    self.stats['errors']['files_not_found'][filepath] = path
                    yield item
                    continue

                mimetype, _encoding = guess_type(filepath, strict=False)
                if mimetype is None:
                    logger.warning("Unknown mimetype for file %s" % filepath)
                    mimetype = 'application/octet-stream'

                obj = item.get('_object')
                if obj is None:
                    logger.warning(
                        "Cannot set file. Document %s doesn't exist." % path)
                    yield item
                    continue

                try:
                    with open(filepath, 'rb') as f:
                        namedblobfile = file_field._type(
                            data=f.read(),
                            contentType=mimetype,
                            filename=filename)
                        setattr(obj, file_field.getName(), namedblobfile)
                except EnvironmentError as e:
                    # TODO: Check for this in OGGBundle validation
                    logger.warning("Can't open file %s. %s." % (
                        filepath, str(e)))
                    self.stats['errors']['files_io_errors'][filepath] = path
                    yield item
                    continue

                # Fire these event handlers manually because they got fired
                # too early before (when the file contents weren't loaded yet)
                if self.is_mail(item):
                    initialize_metadata(obj, None)
                    # Reset the [No Subject] placeholder
                    obj.title = None
                    initalize_title(obj, None)
                else:
                    sync_title_and_filename_handler(obj, None)
                    set_digitally_available(obj, None)

            yield item
Example #6
0
    def __iter__(self):
        for item in self.previous:
            guid = item['guid']

            if self.is_mail(item):
                file_field = IMail['message']
            else:
                file_field = IDocumentSchema['file']

            keys = item.keys()
            pathkey = self.pathkey(*keys)[0]

            if self.key in item:
                _filepath = item[self.key]
                if _filepath is None:
                    yield item
                    continue

                if pathkey not in item:
                    logger.warning("Missing path key for file %s" % _filepath)
                    yield item
                    continue
                path = item[pathkey]

                abs_filepath = self.build_absolute_filepath(_filepath)
                if abs_filepath is None:
                    logger.warning('Unresolvable filepath: %s' % _filepath)
                    error = (guid, _filepath, path)
                    self.bundle.errors['files_unresolvable_path'].append(error)
                    yield item
                    continue

                filename = os.path.basename(abs_filepath)
                if isinstance(filename, str):
                    filename = filename.decode('utf8')

                # TODO: Check for this in OGGBundle validation
                if any(abs_filepath.lower().endswith(ext)
                       for ext in INVALID_FILE_EXTENSIONS):  # noqa
                    logger.warning("Skipping file with invalid type: %s" %
                                   abs_filepath)
                    error = (guid, abs_filepath, path)
                    self.bundle.errors['files_invalid_types'].append(error)
                    yield item
                    continue

                # TODO: Check for this in OGGBundle validation
                if not os.path.exists(abs_filepath):
                    logger.warning("File not found: %s" % abs_filepath)
                    error = (guid, abs_filepath, path)
                    self.bundle.errors['files_not_found'].append(error)
                    yield item
                    continue

                mimetype, _encoding = guess_type(abs_filepath, strict=False)
                if mimetype is None:
                    logger.warning("Unknown mimetype for file %s" %
                                   abs_filepath)
                    mimetype = 'application/octet-stream'

                obj = item.get('_object')
                if obj is None:
                    logger.warning(
                        "Cannot set file. Document %s doesn't exist." % path)
                    yield item
                    continue

                try:
                    with open(abs_filepath, 'rb') as f:
                        namedblobfile = file_field._type(data=f.read(),
                                                         contentType=mimetype,
                                                         filename=filename)
                        setattr(obj, file_field.getName(), namedblobfile)
                except EnvironmentError as e:
                    # TODO: Check for this in OGGBundle validation
                    logger.warning("Can't open file %s. %s." %
                                   (abs_filepath, str(e)))
                    error = (guid, abs_filepath, str(e), path)
                    self.bundle.errors['files_io_errors'].append(error)
                    yield item
                    continue

                # Fire these event handlers manually because they got fired
                # too early before (when the file contents weren't loaded yet)
                if self.is_mail(item):
                    initialize_metadata(obj, None)

                    if obj.title == NO_SUBJECT_TITLE_FALLBACK:
                        # Reset the [No Subject] placeholder
                        obj.title = None
                        initalize_title(obj, None)
                else:
                    sync_title_and_filename_handler(obj, None)
                    set_digitally_available(obj, None)

            yield item