Python IngestEmailError.emailParseErrorの例

プログラミング言語: Python

名前空間/パッケージ名: superdesk.errors

クラス/型: IngestEmailError

メソッド/関数: emailParseError

hotexamples.comのコード掲載数: 10

Python IngestEmailError.emailParseError - 10件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsuperdesk.errors.IngestEmailError.emailParseErrorの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

emailError(13)

emailParseError(6)

emailLoginError(5)

emailFilterError(3)

emailMailboxError(3)

emailHostError(2)

notConfiguredError(1)

コード例 #1

ファイルを表示

    def parse(self, data, provider=None):
        config = provider.get('config', {})
        # If the channel is configured to process structured email generated from a google form
        if config.get('formatted', False):
            return self._parse_formatted_email(data, provider)
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item['headline'] = self.parse_header(msg['subject'])
                    field_from = self.parse_header(msg['from'])
                    item['original_source'] = field_from
                    try:
                        if email_regex.findall(field_from):
                            email_address = email_regex.findall(field_from)[0]
                            user = get_resource_service(
                                'users').get_user_by_email(email_address)
                            item['original_creator'] = user[
                                eve.utils.config.ID_FIELD]
                    except UserNotRegisteredException:
                        pass
                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}: {2}"
                                    .format(item['headline'], field_from, ex))
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = self.safe_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing html body for {0} from {1}: {2}"
                                    .format(item['headline'], field_from, ex))
                                continue
                        if part.get_content_maintype() == 'multipart':
                            continue
                        if part.get('Content-Disposition') is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != 'image':
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(
                                content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == 'image/gif' or content_type == 'image/png':
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(
                                content,
                                filename=fileName,
                                content_type=content_type,
                                metadata=metadata)
                            renditions = {'baseImage': {'href': image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item[ITEM_TYPE] = CONTENT_TYPE.COMPOSITE
                                comp_item['guid'] = generate_guid(
                                    type=GUID_TAG)
                                comp_item['versioncreated'] = utcnow()
                                comp_item['groups'] = []
                                comp_item['headline'] = item['headline']
                                comp_item['groups'] = []
                                comp_item['original_source'] = item[
                                    'original_source']
                                if 'original_creator' in item:
                                    comp_item['original_creator'] = item[
                                        'original_creator']

                                # create a reference to the item that stores the body of the email
                                item_ref = {
                                    'guid': item['guid'],
                                    'residRef': item['guid'],
                                    'headline': item['headline'],
                                    'location': 'ingest',
                                    'itemClass': 'icls:text',
                                    'original_source': item['original_source']
                                }
                                if 'original_creator' in item:
                                    item_ref['original_creator'] = item[
                                        'original_creator']
                                refs.append(item_ref)

                            media_item = dict()
                            media_item['guid'] = generate_guid(type=GUID_TAG)
                            media_item['versioncreated'] = utcnow()
                            media_item[ITEM_TYPE] = CONTENT_TYPE.PICTURE
                            media_item['renditions'] = renditions
                            media_item['mimetype'] = content_type
                            set_filemeta(media_item, metadata)
                            media_item['slugline'] = fileName
                            if text_body is not None:
                                media_item['body_html'] = text_body
                            media_item['headline'] = item['headline']
                            media_item['original_source'] = item[
                                'original_source']
                            if 'original_creator' in item:
                                media_item['original_creator'] = item[
                                    'original_creator']
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {
                                'guid': media_item['guid'],
                                'residRef': media_item['guid'],
                                'headline': fileName,
                                'location': 'ingest',
                                'itemClass': 'icls:picture',
                                'original_source': item['original_source']
                            }
                            if 'original_creator' in item:
                                media_ref['original_creator'] = item[
                                    'original_creator']
                            refs.append(media_ref)

            if html_body is not None:
                item['body_html'] = html_body
            else:
                item['body_html'] = '<pre>' + text_body + '</pre>'
                item[FORMAT] = FORMATS.PRESERVED

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {
                    'refs': [{
                        'idRef': 'main'
                    }],
                    'id': 'root',
                    'role': 'grpRole:NEP'
                }
                comp_item['groups'].append(grefs)

                grefs = {'refs': refs, 'id': 'main', 'role': 'grpRole:Main'}
                comp_item['groups'].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)

コード例 #2

ファイルを表示

ファイル: rfc822.py プロジェクト: girgen79/superdesk

    def parse_email(self, data, provider):
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item['type'] = 'text'
            item['versioncreated'] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item['headline'] = self.parse_header(msg['subject'])
                    item['original_creator'] = self.parse_header(msg['from'])
                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}"
                                    .format(item['headline'],
                                            item['original_creator']), ex)
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = self.safe_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text html for {0} from {1}"
                                    .format(item['headline'],
                                            item['original_creator']), ex)
                                continue
                        if part.get_content_maintype() == 'multipart':
                            continue
                        if part.get('Content-Disposition') is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != 'image':
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(
                                content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == 'image/gif' or content_type == 'image/png':
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(
                                content,
                                filename=fileName,
                                content_type=content_type,
                                metadata=metadata)
                            renditions = {'baseImage': {'href': image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item['type'] = 'composite'
                                comp_item['guid'] = generate_guid(
                                    type=GUID_TAG)
                                comp_item['versioncreated'] = utcnow()
                                comp_item['groups'] = []
                                comp_item['headline'] = item['headline']
                                comp_item['groups'] = []

                                # create a reference to the item that stores the body of the email
                                item_ref = {}
                                item_ref['guid'] = item['guid']
                                item_ref['residRef'] = item['guid']
                                item_ref['headline'] = item['headline']
                                item_ref['location'] = 'ingest'
                                item_ref['itemClass'] = 'icls:text'
                                refs.append(item_ref)

                            media_item = dict()
                            media_item['guid'] = generate_guid(type=GUID_TAG)
                            media_item['versioncreated'] = utcnow()
                            media_item['type'] = 'picture'
                            media_item['renditions'] = renditions
                            media_item['mimetype'] = content_type
                            media_item['filemeta'] = metadata
                            media_item['slugline'] = fileName
                            if text_body is not None:
                                media_item['body_html'] = text_body
                            media_item['headline'] = item['headline']
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {}
                            media_ref['guid'] = media_item['guid']
                            media_ref['residRef'] = media_item['guid']
                            media_ref['headline'] = fileName
                            media_ref['location'] = 'ingest'
                            media_ref['itemClass'] = 'icls:picture'
                            refs.append(media_ref)

            if html_body is not None:
                item['body_html'] = html_body
            else:
                item['body_html'] = text_body
                item['type'] = 'preformatted'

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {}
                grefs['refs'] = [{'idRef': 'main'}]
                grefs['id'] = 'root'
                grefs['role'] = 'grpRole:NEP'
                comp_item['groups'].append(grefs)

                grefs = {}
                grefs['refs'] = refs
                grefs['id'] = 'main'
                grefs['role'] = 'grpRole:Main'
                comp_item['groups'].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)

コード例 #3

ファイルを表示

    def _parse_formatted_email(self, data, provider):
        """Construct an item from an email that was constructed as a notification from a google form submission.

        The google form submits to a google sheet, this sheet creates the email as a notification

        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line matches what we expect, ignore it if not
                    if self.parse_header(
                            msg['subject']) != 'Formatted Editorial Story':
                        return []

                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace('\r\n',
                                                                 '').replace(
                                                                     '  ', ' ')
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace(
                                    '\r\n', '').replace('  ', ' ')

                            mail_item = dict(
                                (k, v[0])
                                for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item['original_source'] = mail_item.get(
                                'Username', '')
                            item['headline'] = mail_item.get('Headline', '')
                            item['abstract'] = mail_item.get('Abstract', '')
                            item['slugline'] = mail_item.get('Slugline', '')
                            item['body_html'] = '<p>' + mail_item.get(
                                'Body', '').replace('\n', '</p><p>') + '</p>'

                            default_source = app.config.get(
                                'DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES')
                            city = mail_item.get('Dateline', '')
                            cities = app.locators.find_cities()
                            located = [
                                c for c in cities
                                if c['city'].lower() == city.lower()
                            ]
                            item.setdefault('dateline', {})
                            item['dateline']['located'] = located[0] if len(
                                located) > 0 else {
                                    'city_code': city,
                                    'city': city,
                                    'tz': 'UTC',
                                    'dateline': 'city'
                                }
                            item['dateline']['source'] = default_source
                            item['dateline'][
                                'text'] = format_dateline_to_locmmmddsrc(
                                    item['dateline']['located'],
                                    get_date(item['firstcreated']),
                                    source=default_source)

                            if mail_item.get('Priority') != '':
                                if mail_item.get('Priority', '3').isdigit():
                                    item['priority'] = int(
                                        mail_item.get('Priority', '3'))
                                else:
                                    priority_map = superdesk.get_resource_service(
                                        'vocabularies').find_one(
                                            req=None, _id='priority')
                                    priorities = [
                                        x
                                        for x in priority_map.get('items', [])
                                        if x['name'].upper() == mail_item.get(
                                            'Priority', '').upper()
                                    ]
                                    if priorities is not None and len(
                                            priorities) > 0:
                                        item['priority'] = int(
                                            priorities[0].get('qcode', '3'))
                                    else:
                                        item['priority'] = 3
                            if mail_item.get('News Value') != '':
                                item['urgency'] = int(
                                    mail_item.get('News Value', '3'))

                            # We expect the username passed corresponds to a superdesk user
                            query = {
                                'email':
                                re.compile(
                                    '^{}$'.format(mail_item.get('Username')),
                                    re.IGNORECASE)
                            }
                            user = superdesk.get_resource_service(
                                'users').find_one(req=None, **query)
                            if not user:
                                logger.error(
                                    'Failed to find user for email {}'.format(
                                        mail_item.get('Username')))
                                raise UserNotRegisteredException()
                            item['original_creator'] = user.get('_id')
                            if BYLINE in user and user.get(BYLINE, ''):
                                item['byline'] = user.get(BYLINE)
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            query = {
                                'name':
                                re.compile(
                                    '^{}$'.format(mail_item.get('Desk', '')),
                                    re.IGNORECASE)
                            }
                            desk = superdesk.get_resource_service(
                                'desks').find_one(req=None, **query)
                            if desk:
                                item['task'] = {
                                    'desk': desk.get('_id'),
                                    'stage': desk.get('incoming_stage')
                                }

                            if 'Place' in mail_item:
                                locator_map = superdesk.get_resource_service(
                                    'vocabularies').find_one(req=None,
                                                             _id='locators')
                                place = [
                                    x for x in locator_map.get('items', [])
                                    if x['qcode'] == mail_item.get(
                                        'Place', '').upper()
                                ]
                                if place is not None:
                                    item['place'] = place

                            if mail_item.get('Legal flag', '') == 'LEGAL':
                                item['flags'] = {'marked_for_legal': True}

                            break

            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)

コード例 #4

ファイルを表示

ファイル: rfc822.py プロジェクト: hlmnrmr/superdesk-core

    def _parse_formatted_email(self, data, provider):
        """Construct an item from an email that was constructed as a notification from a google form submission.

        The google form submits to a google sheet, this sheet creates the email as a notification

        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line matches what we expect, ignore it if not
                    if self.parse_header(msg['subject']) != 'Formatted Editorial Story':
                        return []

                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace('\r\n', '').replace('  ', ' ')
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace('\r\n', '').replace('  ', ' ')

                            mail_item = dict((k, v[0]) for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item['original_source'] = mail_item.get('Username', '')
                            item['headline'] = mail_item.get('Headline', '')
                            item['abstract'] = mail_item.get('Abstract', '')
                            item['slugline'] = mail_item.get('Slugline', '')
                            item['body_html'] = '<p>' + mail_item.get('Body', '').replace('\n', '</p><p>') + '</p>'

                            default_source = app.config.get('DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES')
                            city = mail_item.get('Dateline', '')
                            cities = app.locators.find_cities()
                            located = [c for c in cities if c['city'].lower() == city.lower()]
                            item.setdefault('dateline', {})
                            item['dateline']['located'] = located[0] if len(located) > 0 else {'city_code': city,
                                                                                               'city': city,
                                                                                               'tz': 'UTC',
                                                                                               'dateline': 'city'}
                            item['dateline']['source'] = default_source
                            item['dateline']['text'] = format_dateline_to_locmmmddsrc(item['dateline']['located'],
                                                                                      get_date(item['firstcreated']),
                                                                                      source=default_source)

                            if mail_item.get('Priority') != '':
                                if mail_item.get('Priority', '3').isdigit():
                                    item['priority'] = int(mail_item.get('Priority', '3'))
                                else:
                                    priority_map = superdesk.get_resource_service('vocabularies').find_one(
                                        req=None, _id='priority')
                                    priorities = [x for x in priority_map.get('items', []) if
                                                  x['name'].upper() == mail_item.get('Priority', '').upper()]
                                    if priorities is not None and len(priorities) > 0:
                                        item['priority'] = int(priorities[0].get('qcode', '3'))
                                    else:
                                        item['priority'] = 3
                            if mail_item.get('News Value') != '':
                                item['urgency'] = int(mail_item.get('News Value', '3'))

                            # We expect the username passed corresponds to a superdesk user
                            query = {'email': re.compile('^{}$'.format(mail_item.get('Username')), re.IGNORECASE)}
                            user = superdesk.get_resource_service('users').find_one(req=None, **query)
                            if not user:
                                logger.error('Failed to find user for email {}'.format(mail_item.get('Username')))
                                raise UserNotRegisteredException()
                            item['original_creator'] = user.get('_id')
                            if BYLINE in user and user.get(BYLINE, ''):
                                item['byline'] = user.get(BYLINE)
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            query = {'name': re.compile('^{}$'.format(mail_item.get('Desk', '')), re.IGNORECASE)}
                            desk = superdesk.get_resource_service('desks').find_one(
                                req=None, **query)
                            if desk:
                                item['task'] = {'desk': desk.get('_id'), 'stage': desk.get('incoming_stage')}

                            if 'Place' in mail_item:
                                locator_map = superdesk.get_resource_service('vocabularies').find_one(req=None,
                                                                                                      _id='locators')
                                place = [x for x in locator_map.get('items', []) if
                                         x['qcode'] == mail_item.get('Place', '').upper()]
                                if place is not None:
                                    item['place'] = place

                            if mail_item.get('Legal flag', '') == 'LEGAL':
                                item['flags'] = {'marked_for_legal': True}

                            break

            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)

コード例 #5

ファイルを表示

ファイル: rfc822.py プロジェクト: hlmnrmr/superdesk-core

    def parse(self, data, provider=None):
        config = provider.get('config', {})
        # If the channel is configured to process structured email generated from a google form
        if config.get('formatted', False):
            return self._parse_formatted_email(data, provider)
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item['headline'] = self.parse_header(msg['subject'])
                    field_from = self.parse_header(msg['from'])
                    item['original_source'] = field_from
                    try:
                        if email_regex.findall(field_from):
                            email_address = email_regex.findall(field_from)[0]
                            user = get_resource_service('users').get_user_by_email(email_address)
                            item['original_creator'] = user[eve.utils.config.ID_FIELD]
                    except UserNotRegisteredException:
                        pass
                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}: {2}".format(item['headline'],
                                                                                               field_from, ex))
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = self.safe_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing html body for {0} from {1}: {2}".format(item['headline'],
                                                                                               field_from, ex))
                                continue
                        if part.get_content_maintype() == 'multipart':
                            continue
                        if part.get('Content-Disposition') is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != 'image':
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == 'image/gif' or content_type == 'image/png':
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(content, filename=fileName,
                                                                 content_type=content_type, metadata=metadata)
                            renditions = {'baseImage': {'href': image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item[ITEM_TYPE] = CONTENT_TYPE.COMPOSITE
                                comp_item['guid'] = generate_guid(type=GUID_TAG)
                                comp_item['versioncreated'] = utcnow()
                                comp_item['groups'] = []
                                comp_item['headline'] = item['headline']
                                comp_item['groups'] = []
                                comp_item['original_source'] = item['original_source']
                                if 'original_creator' in item:
                                    comp_item['original_creator'] = item['original_creator']

                                # create a reference to the item that stores the body of the email
                                item_ref = {'guid': item['guid'], 'residRef': item['guid'],
                                            'headline': item['headline'], 'location': 'ingest',
                                            'itemClass': 'icls:text', 'original_source': item['original_source']}
                                if 'original_creator' in item:
                                    item_ref['original_creator'] = item['original_creator']
                                refs.append(item_ref)

                            media_item = dict()
                            media_item['guid'] = generate_guid(type=GUID_TAG)
                            media_item['versioncreated'] = utcnow()
                            media_item[ITEM_TYPE] = CONTENT_TYPE.PICTURE
                            media_item['renditions'] = renditions
                            media_item['mimetype'] = content_type
                            set_filemeta(media_item, metadata)
                            media_item['slugline'] = fileName
                            if text_body is not None:
                                media_item['body_html'] = text_body
                            media_item['headline'] = item['headline']
                            media_item['original_source'] = item['original_source']
                            if 'original_creator' in item:
                                media_item['original_creator'] = item['original_creator']
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {'guid': media_item['guid'], 'residRef': media_item['guid'],
                                         'headline': fileName, 'location': 'ingest', 'itemClass': 'icls:picture',
                                         'original_source': item['original_source']}
                            if 'original_creator' in item:
                                media_ref['original_creator'] = item['original_creator']
                            refs.append(media_ref)

            if html_body is not None:
                item['body_html'] = html_body
            else:
                item['body_html'] = '<pre>' + text_body + '</pre>'
                item[FORMAT] = FORMATS.PRESERVED

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {'refs': [{'idRef': 'main'}], 'id': 'root', 'role': 'grpRole:NEP'}
                comp_item['groups'].append(grefs)

                grefs = {'refs': refs, 'id': 'main', 'role': 'grpRole:Main'}
                comp_item['groups'].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)

コード例 #6

ファイルを表示

ファイル: rfc822.py プロジェクト: yukoff/superdesk

    def parse_email(self, data, provider):
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item['type'] = 'text'
            item['versioncreated'] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item['headline'] = self.parse_header(msg['subject'])
                    item['original_creator'] = self.parse_header(msg['from'])
                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}".format(item['headline'],
                                                                                          item['original_creator']), ex)
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = self.safe_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text html for {0} from {1}".format(item['headline'],
                                                                                          item['original_creator']), ex)
                                continue
                        if part.get_content_maintype() == 'multipart':
                            continue
                        if part.get('Content-Disposition') is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != 'image':
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == 'image/gif' or content_type == 'image/png':
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(content, filename=fileName,
                                                                 content_type=content_type, metadata=metadata)
                            renditions = {'baseImage': {'href': image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item['type'] = 'composite'
                                comp_item['guid'] = generate_guid(type=GUID_TAG)
                                comp_item['versioncreated'] = utcnow()
                                comp_item['groups'] = []
                                comp_item['headline'] = item['headline']
                                comp_item['groups'] = []

                                # create a reference to the item that stores the body of the email
                                item_ref = {}
                                item_ref['guid'] = item['guid']
                                item_ref['residRef'] = item['guid']
                                item_ref['headline'] = item['headline']
                                item_ref['location'] = 'ingest'
                                item_ref['itemClass'] = 'icls:text'
                                refs.append(item_ref)

                            media_item = dict()
                            media_item['guid'] = generate_guid(type=GUID_TAG)
                            media_item['versioncreated'] = utcnow()
                            media_item['type'] = 'picture'
                            media_item['renditions'] = renditions
                            media_item['mimetype'] = content_type
                            media_item['filemeta'] = metadata
                            media_item['slugline'] = fileName
                            if text_body is not None:
                                media_item['body_html'] = text_body
                            media_item['headline'] = item['headline']
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {}
                            media_ref['guid'] = media_item['guid']
                            media_ref['residRef'] = media_item['guid']
                            media_ref['headline'] = fileName
                            media_ref['location'] = 'ingest'
                            media_ref['itemClass'] = 'icls:picture'
                            refs.append(media_ref)

            if html_body is not None:
                item['body_html'] = html_body
            else:
                item['body_html'] = text_body
                item['type'] = 'preformatted'

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {}
                grefs['refs'] = [{'idRef': 'main'}]
                grefs['id'] = 'root'
                grefs['role'] = 'grpRole:NEP'
                comp_item['groups'].append(grefs)

                grefs = {}
                grefs['refs'] = refs
                grefs['id'] = 'main'
                grefs['role'] = 'grpRole:Main'
                comp_item['groups'].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)

コード例 #7

ファイルを表示

ファイル: rfc822.py プロジェクト: ancafarcas/superdesk-core

    def _parse_formatted_email(self, data, provider):
        """
        Passed an email that was constructed as a notificaton from a google form submission it constructs an item.
        The google form submits to a google sheet, this sheet creates the email as a notification
        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line macthes what we expect, ignore it if not
                    if self.parse_header(msg['subject']) != 'Formatted Editorial Story':
                        return []

                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace('\r\n', '').replace('  ', ' ')
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace('\r\n', '').replace('  ', ' ')

                            mail_item = dict((k, v[0]) for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item['original_source'] = mail_item.get('Username')
                            item['headline'] = mail_item.get('Headline')
                            item['abstract'] = mail_item.get('Abstract')
                            item['slugline'] = mail_item.get('Slugline')
                            item['body_html'] = mail_item.get('Body').replace('\n', '<br />')

                            if mail_item.get('Priority') != '':
                                item['priority'] = int(mail_item.get('Priority'))
                            if mail_item.get('Urgency') != '':
                                item['urgency'] = int(mail_item.get('Urgency'))

                            # We expect the username passed coresponds to a superdesk user
                            query = {'email': re.compile('^{}$'.format(mail_item.get('Username')), re.IGNORECASE)}
                            user = superdesk.get_resource_service('users').find_one(req=None, **query)
                            if not user:
                                logger.error('Failed to find user for email {}'.format(mail_item.get('Username')))
                                raise UserNotRegisteredException()
                            item['original_creator'] = user.get('_id')
                            item['byline'] = user.get(BYLINE, user.get('display_name'))
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            desk = superdesk.get_resource_service('desks').find_one(
                                req=None, name=mail_item.get('Desk'))
                            if desk:
                                item['task'] = {'desk': desk.get('_id'), 'stage': desk.get('incoming_stage')}
                            break
            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)

コード例 #8

ファイルを表示

    def _parse_formatted_email(self, data, provider):
        """
        Passed an email that was constructed as a notification from a google form submission it constructs an item.
        The google form submits to a google sheet, this sheet creates the email as a notification
        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line matches what we expect, ignore it if not
                    if self.parse_header(
                            msg['subject']) != 'Formatted Editorial Story':
                        return []

                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace('\r\n',
                                                                 '').replace(
                                                                     '  ', ' ')
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace(
                                    '\r\n', '').replace('  ', ' ')

                            mail_item = dict(
                                (k, v[0])
                                for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item['original_source'] = mail_item.get('Username')
                            item['headline'] = mail_item.get('Headline')
                            item['abstract'] = mail_item.get('Abstract')
                            item['slugline'] = mail_item.get('Slugline')
                            item['body_html'] = mail_item.get('Body').replace(
                                '\n', '<br />')

                            if mail_item.get('Priority') != '':
                                item['priority'] = int(
                                    mail_item.get('Priority'))
                            if mail_item.get('Urgency') != '':
                                item['urgency'] = int(mail_item.get('Urgency'))

                            # We expect the username passed corresponds to a superdesk user
                            query = {
                                'email':
                                re.compile(
                                    '^{}$'.format(mail_item.get('Username')),
                                    re.IGNORECASE)
                            }
                            user = superdesk.get_resource_service(
                                'users').find_one(req=None, **query)
                            if not user:
                                logger.error(
                                    'Failed to find user for email {}'.format(
                                        mail_item.get('Username')))
                                raise UserNotRegisteredException()
                            item['original_creator'] = user.get('_id')
                            item['byline'] = user.get(BYLINE,
                                                      user.get('display_name'))
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            desk = superdesk.get_resource_service(
                                'desks').find_one(req=None,
                                                  name=mail_item.get('Desk'))
                            if desk:
                                item['task'] = {
                                    'desk': desk.get('_id'),
                                    'stage': desk.get('incoming_stage')
                                }
                            break
            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)

コード例 #9

ファイルを表示

    def parse(self, data, provider=None):
        config = provider.get("config", {})
        # If the channel is configured to process structured email generated from a google form
        if config.get("formatted", False):
            return self._parse_formatted_email(data, provider)
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item["versioncreated"] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item["headline"] = self.parse_header(msg["subject"])
                    field_from = self.parse_header(msg["from"])
                    item["original_source"] = field_from
                    try:
                        if email_regex.findall(field_from):
                            email_address = email_regex.findall(field_from)[0]
                            user = get_resource_service(
                                "users").get_user_by_email(email_address)
                            item["original_creator"] = user[
                                eve.utils.config.ID_FIELD]
                    except UserNotRegisteredException:
                        pass
                    item["guid"] = msg["Message-ID"]
                    date_tuple = email.utils.parsedate_tz(msg["Date"])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone("utc"))
                        item["firstcreated"] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}: {2}"
                                    .format(item["headline"], field_from, ex))
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = sanitize_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing html body for {0} from {1}: {2}"
                                    .format(item["headline"], field_from, ex))
                                continue
                        if part.get_content_maintype() == "multipart":
                            continue
                        if part.get("Content-Disposition") is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != "image":
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(
                                content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == "image/gif" or content_type == "image/png":
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(
                                content,
                                filename=fileName,
                                content_type=content_type,
                                metadata=metadata)
                            renditions = {"baseImage": {"href": image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item[ITEM_TYPE] = CONTENT_TYPE.COMPOSITE
                                comp_item["guid"] = generate_guid(
                                    type=GUID_TAG)
                                comp_item["versioncreated"] = utcnow()
                                comp_item["groups"] = []
                                comp_item["headline"] = item["headline"]
                                comp_item["groups"] = []
                                comp_item["original_source"] = item[
                                    "original_source"]
                                if "original_creator" in item:
                                    comp_item["original_creator"] = item[
                                        "original_creator"]

                                # create a reference to the item that stores the body of the email
                                item_ref = {
                                    "guid": item["guid"],
                                    "residRef": item["guid"],
                                    "headline": item["headline"],
                                    "location": "ingest",
                                    "itemClass": "icls:text",
                                    "original_source": item["original_source"],
                                }
                                if "original_creator" in item:
                                    item_ref["original_creator"] = item[
                                        "original_creator"]
                                refs.append(item_ref)

                            media_item = dict()
                            media_item["guid"] = generate_guid(type=GUID_TAG)
                            media_item["versioncreated"] = utcnow()
                            media_item[ITEM_TYPE] = CONTENT_TYPE.PICTURE
                            media_item["renditions"] = renditions
                            media_item["mimetype"] = content_type
                            set_filemeta(media_item, metadata)
                            media_item["slugline"] = fileName
                            if text_body is not None:
                                media_item["body_html"] = text_body
                            media_item["headline"] = item["headline"]
                            media_item["original_source"] = item[
                                "original_source"]
                            if "original_creator" in item:
                                media_item["original_creator"] = item[
                                    "original_creator"]
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {
                                "guid": media_item["guid"],
                                "residRef": media_item["guid"],
                                "headline": fileName,
                                "location": "ingest",
                                "itemClass": "icls:picture",
                                "original_source": item["original_source"],
                            }
                            if "original_creator" in item:
                                media_ref["original_creator"] = item[
                                    "original_creator"]
                            refs.append(media_ref)

            if html_body:
                item["body_html"] = html_body
            else:
                item["body_html"] = "<pre>" + text_body + "</pre>"
                item[FORMAT] = FORMATS.PRESERVED

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {
                    "refs": [{
                        "idRef": "main"
                    }],
                    "id": "root",
                    "role": "grpRole:NEP"
                }
                comp_item["groups"].append(grefs)

                grefs = {"refs": refs, "id": "main", "role": "grpRole:Main"}
                comp_item["groups"].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)

コード例 #10

ファイルを表示

    def _parse_formatted_email(self, data, provider):
        """Construct an item from an email that was constructed as a notification from a google form submission.

        The google form submits to a google sheet, this sheet creates the email as a notification

        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item["versioncreated"] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line matches what we expect, ignore it if not
                    if self.parse_header(
                            msg["subject"]) != "Formatted Editorial Story":
                        return []

                    item["guid"] = msg["Message-ID"]
                    date_tuple = email.utils.parsedate_tz(msg["Date"])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone("utc"))
                        item["firstcreated"] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace("\r\n",
                                                                 "").replace(
                                                                     "  ", " ")
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace(
                                    "\r\n", "").replace("  ", " ")

                            mail_item = dict(
                                (k, v[0])
                                for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item["original_source"] = mail_item.get(
                                "Username", mail_item.get("Email Address", ""))
                            item["headline"] = mail_item.get("Headline", "")
                            item["abstract"] = mail_item.get("Abstract", "")
                            item["slugline"] = mail_item.get("Slugline", "")
                            item["body_html"] = "<p>" + mail_item.get(
                                "Body", "").replace("\n", "</p><p>") + "</p>"

                            default_source = app.config.get(
                                "DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES")
                            city = mail_item.get("Dateline", "")
                            cities = app.locators.find_cities()
                            located = [
                                c for c in cities
                                if c["city"].lower() == city.lower()
                            ]
                            item.setdefault("dateline", {})
                            item["dateline"]["located"] = (
                                located[0] if len(located) > 0 else {
                                    "city_code": city,
                                    "city": city,
                                    "tz": "UTC",
                                    "dateline": "city"
                                })
                            item["dateline"]["source"] = default_source
                            item["dateline"][
                                "text"] = format_dateline_to_locmmmddsrc(
                                    item["dateline"]["located"],
                                    get_date(item["firstcreated"]),
                                    source=default_source)

                            if mail_item.get("Priority") != "":
                                if mail_item.get("Priority", "3").isdigit():
                                    item["priority"] = int(
                                        mail_item.get("Priority", "3"))
                                else:
                                    priority_map = superdesk.get_resource_service(
                                        "vocabularies").find_one(
                                            req=None, _id="priority")
                                    priorities = [
                                        x
                                        for x in priority_map.get("items", [])
                                        if x["name"].upper() == mail_item.get(
                                            "Priority", "").upper()
                                    ]
                                    if priorities is not None and len(
                                            priorities) > 0:
                                        item["priority"] = int(
                                            priorities[0].get("qcode", "3"))
                                    else:
                                        item["priority"] = 3
                            if mail_item.get("News Value") != "":
                                item["urgency"] = int(
                                    mail_item.get("News Value", "3"))

                            # We expect the username passed corresponds to a superdesk user
                            query = {
                                "email":
                                re.compile(
                                    "^{}$".format(
                                        mail_item.get(
                                            "Username",
                                            mail_item.get("Email Address",
                                                          ""))),
                                    re.IGNORECASE,
                                )
                            }
                            user = superdesk.get_resource_service(
                                "users").find_one(req=None, **query)
                            if not user:
                                logger.error(
                                    "Failed to find user for email {}".format(
                                        mail_item.get(
                                            "Username",
                                            mail_item.get("Email Address",
                                                          ""))))
                                raise UserNotRegisteredException()
                            item["original_creator"] = user.get("_id")
                            if BYLINE in user and user.get(BYLINE, ""):
                                item["byline"] = user.get(BYLINE)
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            query = {
                                "name":
                                re.compile(
                                    "^{}$".format(mail_item.get("Desk", "")),
                                    re.IGNORECASE)
                            }
                            desk = superdesk.get_resource_service(
                                "desks").find_one(req=None, **query)
                            if desk:
                                item["task"] = {
                                    "desk": desk.get("_id"),
                                    "stage": desk.get("incoming_stage")
                                }

                            if "Place" in mail_item:
                                locator_map = superdesk.get_resource_service(
                                    "vocabularies").find_one(req=None,
                                                             _id="locators")
                                place = [
                                    x for x in locator_map.get("items", [])
                                    if x["qcode"] == mail_item.get(
                                        "Place", "").upper()
                                ]
                                if place is not None:
                                    item["place"] = place

                            if mail_item.get("Legal flag", "") == "LEGAL":
                                item["flags"] = {"marked_for_legal": True}

                            break

            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)