Esempio n. 1
0
    def _update(self, provider, update, test=False):
        config = provider.get("config", {})
        new_items = []

        try:
            imap = self.authenticate(provider, config)

            try:
                rv, data = imap.select(config.get("mailbox", None),
                                       readonly=False)
                if rv != "OK":
                    raise IngestEmailError.emailMailboxError()
                try:
                    rv, data = imap.search(None,
                                           config.get("filter", "(UNSEEN)"))
                    if rv != "OK":
                        raise IngestEmailError.emailFilterError()
                    for num in data[0].split():
                        rv, data = imap.fetch(num, "(RFC822)")
                        if rv == "OK" and not test:
                            try:
                                parser = self.get_feed_parser(provider, data)
                                new_items.append(parser.parse(data, provider))
                                rv, data = imap.store(num, "+FLAGS", "\\Seen")
                            except IngestEmailError:
                                continue
                finally:
                    imap.close()
            finally:
                imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items
Esempio n. 2
0
    def _update(self, provider):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))

        try:
            imap = imaplib.IMAP4_SSL(host=server, port=port)
            try:
                imap.login(config.get('user', None), config.get('password', None))
            except imaplib.IMAP4.error:
                raise IngestEmailError.emailLoginError(imaplib.IMAP4.error, provider)

            rv, data = imap.select(config.get('mailbox', None), readonly=False)
            if rv == 'OK':
                rv, data = imap.search(None, config.get('filter', '(UNSEEN)'))
                if rv == 'OK':
                    new_items = []
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK':
                            try:
                                parser = self.get_feed_parser(provider, data)
                                new_items.append(parser.parse(data, provider))
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                imap.close()
            imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items
Esempio n. 3
0
    def _update(self, provider, update, test=False):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))
        new_items = []

        try:
            try:
                socket.setdefaulttimeout(app.config.get('EMAIL_TIMEOUT', 10))
                imap = imaplib.IMAP4_SSL(host=server, port=port)
            except (socket.gaierror, OSError) as e:
                raise IngestEmailError.emailHostError(exception=e,
                                                      provider=provider)

            try:
                imap.login(config.get('user', None),
                           config.get('password', None))
            except imaplib.IMAP4.error:
                raise IngestEmailError.emailLoginError(imaplib.IMAP4.error,
                                                       provider)

            try:
                rv, data = imap.select(config.get('mailbox', None),
                                       readonly=False)
                if rv != 'OK':
                    raise IngestEmailError.emailMailboxError()
                try:
                    rv, data = imap.search(None,
                                           config.get('filter', '(UNSEEN)'))
                    if rv != 'OK':
                        raise IngestEmailError.emailFilterError()
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK' and not test:
                            try:
                                parser = self.get_feed_parser(provider, data)
                                item = parser.parse(data, provider)
                                if config.get('attachment'):
                                    self.save_attachment(data, item)
                                new_items.append(item)
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                finally:
                    imap.close()
            finally:
                imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items
Esempio n. 4
0
class EmailFeedingService(FeedingService):
    """
    Feeding Service class which can read the article(s) from a configured mail box.
    """

    NAME = 'email'
    ERRORS = [
        IngestEmailError.emailError().get_error_description(),
        IngestEmailError.emailLoginError().get_error_description()
    ]

    label = 'Email'

    def _update(self, provider, update):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))

        try:
            imap = imaplib.IMAP4_SSL(host=server, port=port)
            try:
                imap.login(config.get('user', None),
                           config.get('password', None))
            except imaplib.IMAP4.error:
                raise IngestEmailError.emailLoginError(imaplib.IMAP4.error,
                                                       provider)

            rv, data = imap.select(config.get('mailbox', None), readonly=False)
            if rv == 'OK':
                rv, data = imap.search(None, config.get('filter', '(UNSEEN)'))
                if rv == 'OK':
                    new_items = []
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK':
                            try:
                                parser = self.get_feed_parser(provider, data)
                                new_items.append(parser.parse(data, provider))
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                imap.close()
            imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items

    def prepare_href(self, href, mimetype=None):
        return url_for_media(href, mimetype)
Esempio n. 5
0
    def authenticate(self, provider: dict, config: dict) -> imaplib.IMAP4_SSL:
        server = config.get("server", "")
        port = int(config.get("port", 993))
        try:
            socket.setdefaulttimeout(app.config.get("EMAIL_TIMEOUT", 10))
            imap = imaplib.IMAP4_SSL(host=server, port=port)
        except (socket.gaierror, OSError) as e:
            raise IngestEmailError.emailHostError(exception=e,
                                                  provider=provider)

        try:
            imap.login(config.get("user", None), config.get("password", None))
        except imaplib.IMAP4.error:
            raise IngestEmailError.emailLoginError(imaplib.IMAP4.error,
                                                   provider)

        return imap
Esempio n. 6
0
    def _update(self, provider, update, test=False):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))
        new_items = []

        try:
            try:
                socket.setdefaulttimeout(app.config.get('EMAIL_TIMEOUT', 10))
                imap = imaplib.IMAP4_SSL(host=server, port=port)
            except (socket.gaierror, OSError) as e:
                raise IngestEmailError.emailHostError(exception=e, provider=provider)

            try:
                imap.login(config.get('user', None), config.get('password', None))
            except imaplib.IMAP4.error:
                raise IngestEmailError.emailLoginError(imaplib.IMAP4.error, provider)

            try:
                rv, data = imap.select(config.get('mailbox', None), readonly=False)
                if rv != 'OK':
                    raise IngestEmailError.emailMailboxError()
                try:
                    rv, data = imap.search(None, config.get('filter', '(UNSEEN)'))
                    if rv != 'OK':
                        raise IngestEmailError.emailFilterError()
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK' and not test:
                            try:
                                parser = self.get_feed_parser(provider, data)
                                new_items.append(parser.parse(data, provider))
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                finally:
                    imap.close()
            finally:
                imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items
Esempio n. 7
0
class EmailReaderService(IngestService):

    PROVIDER = 'email'

    ERRORS = [IngestEmailError.emailError().get_error_description(),
              IngestEmailError.emailLoginError().get_error_description()]

    def __init__(self):
        self.parser = rfc822Parser()

    def _update(self, provider):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))

        try:
            imap = imaplib.IMAP4_SSL(host=server, port=port)
            try:
                imap.login(config.get('user', None), config.get('password', None))
            except imaplib.IMAP4.error:
                raise IngestEmailError.emailLoginError(imaplib.IMAP4.error, provider)

            rv, data = imap.select(config.get('mailbox', None), readonly=False)
            if rv == 'OK':
                rv, data = imap.search(None, config.get('filter', None))
                if rv == 'OK':
                    new_items = []
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK':
                            try:
                                new_items.append(self.parser.parse_email(data, provider))
                            except IngestEmailError:
                                continue
                imap.close()
            imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items

    def prepare_href(self, href):
        return url_for_media(href)
Esempio n. 8
0
    def _update(self, provider, update, test=False):
        config = provider.get("config", {})
        new_items = []

        try:
            imap = self.authenticate(provider, config)

            try:
                rv, data = imap.select(config.get("mailbox", None),
                                       readonly=False)
                if rv != "OK":
                    raise IngestEmailError.emailMailboxError()
                try:
                    # at least one criterion must be set
                    # (see file:///usr/share/doc/python/html/library/imaplib.html#imaplib.IMAP4.search)
                    rv, data = imap.search(None,
                                           config.get("filter") or "(UNSEEN)")
                    if rv != "OK":
                        raise IngestEmailError.emailFilterError()
                    for num in data[0].split():
                        rv, data = imap.fetch(num, "(RFC822)")
                        if rv == "OK" and not test:
                            try:
                                parser = self.get_feed_parser(provider, data)
                                parsed_items = parser.parse(data, provider)
                                self.parse_extra(imap, num, parsed_items)
                                new_items.append(parsed_items)
                                rv, data = imap.store(num, "+FLAGS", "\\Seen")
                            except IngestEmailError:
                                continue
                finally:
                    imap.close()
            finally:
                imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items
Esempio n. 9
0
    def authenticate(self, provider: dict, config: dict) -> imaplib.IMAP4_SSL:
        oauth2_token_service = superdesk.get_resource_service("oauth2_token")
        token = oauth2_token_service.find_one(req=None, _id=provider["url_id"])
        if token is None:
            raise IngestEmailError.notConfiguredError(ValueError(
                l_("You need to log in first")),
                                                      provider=provider)
        imap = imaplib.IMAP4_SSL("imap.gmail.com")

        if token["expires_at"].timestamp() < time.time() + 600:
            logger.info("Refreshing token for {provider_name}".format(
                provider_name=provider["name"]))
            token = oauth.refresh_google_token(token["_id"])

        auth_string = "user={email}\x01auth=Bearer {token}\x01\x01".format(
            email=token["email"], token=token["access_token"])
        imap.authenticate("XOAUTH2", lambda __: auth_string.encode())
        return imap
Esempio n. 10
0
    def parse_email(self, data, provider):
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item['type'] = 'text'
            item['versioncreated'] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item['headline'] = self.parse_header(msg['subject'])
                    item['original_creator'] = self.parse_header(msg['from'])
                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}"
                                    .format(item['headline'],
                                            item['original_creator']), ex)
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = self.safe_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text html for {0} from {1}"
                                    .format(item['headline'],
                                            item['original_creator']), ex)
                                continue
                        if part.get_content_maintype() == 'multipart':
                            continue
                        if part.get('Content-Disposition') is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != 'image':
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(
                                content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == 'image/gif' or content_type == 'image/png':
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(
                                content,
                                filename=fileName,
                                content_type=content_type,
                                metadata=metadata)
                            renditions = {'baseImage': {'href': image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item['type'] = 'composite'
                                comp_item['guid'] = generate_guid(
                                    type=GUID_TAG)
                                comp_item['versioncreated'] = utcnow()
                                comp_item['groups'] = []
                                comp_item['headline'] = item['headline']
                                comp_item['groups'] = []

                                # create a reference to the item that stores the body of the email
                                item_ref = {}
                                item_ref['guid'] = item['guid']
                                item_ref['residRef'] = item['guid']
                                item_ref['headline'] = item['headline']
                                item_ref['location'] = 'ingest'
                                item_ref['itemClass'] = 'icls:text'
                                refs.append(item_ref)

                            media_item = dict()
                            media_item['guid'] = generate_guid(type=GUID_TAG)
                            media_item['versioncreated'] = utcnow()
                            media_item['type'] = 'picture'
                            media_item['renditions'] = renditions
                            media_item['mimetype'] = content_type
                            media_item['filemeta'] = metadata
                            media_item['slugline'] = fileName
                            if text_body is not None:
                                media_item['body_html'] = text_body
                            media_item['headline'] = item['headline']
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {}
                            media_ref['guid'] = media_item['guid']
                            media_ref['residRef'] = media_item['guid']
                            media_ref['headline'] = fileName
                            media_ref['location'] = 'ingest'
                            media_ref['itemClass'] = 'icls:picture'
                            refs.append(media_ref)

            if html_body is not None:
                item['body_html'] = html_body
            else:
                item['body_html'] = text_body
                item['type'] = 'preformatted'

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {}
                grefs['refs'] = [{'idRef': 'main'}]
                grefs['id'] = 'root'
                grefs['role'] = 'grpRole:NEP'
                comp_item['groups'].append(grefs)

                grefs = {}
                grefs['refs'] = refs
                grefs['id'] = 'main'
                grefs['role'] = 'grpRole:Main'
                comp_item['groups'].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 11
0
    def _update(self, provider, update):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))

        try:
            imap = imaplib.IMAP4_SSL(host=server, port=port)
            try:
                imap.login(config.get('user', None),
                           config.get('password', None))
            except imaplib.IMAP4.error:
                raise IngestEmailError.emailLoginError(imaplib.IMAP4.error,
                                                       provider)

            rv, data = imap.select(config.get('mailbox', None), readonly=False)
            if rv == 'OK':
                rv, data = imap.search(None, config.get('filter', '(UNSEEN)'))
                if rv == 'OK':
                    new_items = []
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK':
                            try:
                                logger.info('Ingesting events from email')
                                parser = self.get_feed_parser(provider, data)
                                for response_part in data:
                                    if isinstance(response_part, tuple):
                                        if isinstance(response_part[1], bytes):
                                            msg = email.message_from_bytes(
                                                response_part[1])
                                        else:
                                            msg = email.message_from_string(
                                                response_part[1])
                                        # this will loop through all the available multiparts in email
                                        for part in msg.walk():
                                            # parse attached files only
                                            if part.get('Content-Disposition'
                                                        ) is None:
                                                continue
                                            fileName = part.get_filename()
                                            if bool(fileName):
                                                attachment = part.get_payload(
                                                    decode=True)
                                                content = io.BytesIO(
                                                    attachment)
                                                res = process_file_from_stream(
                                                    content,
                                                    part.get_content_type())
                                                file_name, content_type, metadata = res
                                                logger.info(
                                                    'Ingesting events with {} parser'
                                                    .format(parser.__class__.
                                                            __name__))
                                                if getattr(
                                                        parser, 'parse_email'):
                                                    try:
                                                        new_items.append(
                                                            parser.parse_email(
                                                                content,
                                                                content_type,
                                                                provider))
                                                    except ParserError.parseMessageError:
                                                        continue
                                                else:
                                                    new_items.append(
                                                        parser.parse(
                                                            data, provider))
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                imap.close()
            imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items
Esempio n. 12
0
class EventEmailFeedingService(FeedingService):
    """
    Feeding Service class which can read the article(s) from a configured mail box.
    """

    NAME = 'event_email'
    ERRORS = [
        IngestEmailError.emailError().get_error_description(),
        IngestEmailError.emailLoginError().get_error_description()
    ]

    label = 'Event email'

    fields = [{
        'id': 'server',
        'type': 'text',
        'label': 'Email Server',
        'placeholder': 'Email Server',
        'required': True,
        'errors': {
            6003: 'Server not found.',
            6002: 'Unexpected server response'
        }
    }, {
        'id': 'port',
        'type': 'text',
        'label': 'Email Server Port',
        'placeholder': 'Email Server Port',
        'required': True,
        'default': '993'
    }, {
        'id': 'user',
        'type': 'text',
        'label': 'User',
        'placeholder': 'User',
        'required': True
    }, {
        'id': 'password',
        'type': 'password',
        'label': 'Password',
        'placeholder': 'Password',
        'required': True,
        'errors': {
            6000: 'Authentication error.'
        }
    }, {
        'id': 'mailbox',
        'type': 'text',
        'label': 'Mailbox',
        'placeholder': 'Mailbox',
        'required': True,
        'errors': {
            6004: 'Authentication error.'
        }
    }, {
        'id': 'formatted',
        'type': 'boolean',
        'label': 'Formatted Email Parser',
        'required': True
    }, {
        'id': 'filter',
        'type': 'text',
        'label': 'Filter',
        'placeholder': 'Filter',
        'required': True
    }]
    """
    Defines the collection service to be used with this ingest feeding service.
    """
    service = 'events'

    def _update(self, provider, update):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))

        try:
            imap = imaplib.IMAP4_SSL(host=server, port=port)
            try:
                imap.login(config.get('user', None),
                           config.get('password', None))
            except imaplib.IMAP4.error:
                raise IngestEmailError.emailLoginError(imaplib.IMAP4.error,
                                                       provider)

            rv, data = imap.select(config.get('mailbox', None), readonly=False)
            if rv == 'OK':
                rv, data = imap.search(None, config.get('filter', '(UNSEEN)'))
                if rv == 'OK':
                    new_items = []
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK':
                            try:
                                logger.info('Ingesting events from email')
                                parser = self.get_feed_parser(provider, data)
                                for response_part in data:
                                    if isinstance(response_part, tuple):
                                        if isinstance(response_part[1], bytes):
                                            msg = email.message_from_bytes(
                                                response_part[1])
                                        else:
                                            msg = email.message_from_string(
                                                response_part[1])
                                        # this will loop through all the available multiparts in email
                                        for part in msg.walk():
                                            # parse attached files only
                                            if part.get('Content-Disposition'
                                                        ) is None:
                                                continue
                                            fileName = part.get_filename()
                                            if bool(fileName):
                                                attachment = part.get_payload(
                                                    decode=True)
                                                content = io.BytesIO(
                                                    attachment)
                                                res = process_file_from_stream(
                                                    content,
                                                    part.get_content_type())
                                                file_name, content_type, metadata = res
                                                logger.info(
                                                    'Ingesting events with {} parser'
                                                    .format(parser.__class__.
                                                            __name__))
                                                if getattr(
                                                        parser, 'parse_email'):
                                                    try:
                                                        new_items.append(
                                                            parser.parse_email(
                                                                content,
                                                                content_type,
                                                                provider))
                                                    except ParserError.parseMessageError:
                                                        continue
                                                else:
                                                    new_items.append(
                                                        parser.parse(
                                                            data, provider))
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                imap.close()
            imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items

    def prepare_href(self, href, mimetype=None):
        return url_for_media(href, mimetype)
Esempio n. 13
0
    def _parse_formatted_email(self, data, provider):
        """Construct an item from an email that was constructed as a notification from a google form submission.

        The google form submits to a google sheet, this sheet creates the email as a notification

        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item["versioncreated"] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line matches what we expect, ignore it if not
                    if self.parse_header(
                            msg["subject"]) != "Formatted Editorial Story":
                        return []

                    item["guid"] = msg["Message-ID"]
                    date_tuple = email.utils.parsedate_tz(msg["Date"])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone("utc"))
                        item["firstcreated"] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace("\r\n",
                                                                 "").replace(
                                                                     "  ", " ")
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace(
                                    "\r\n", "").replace("  ", " ")

                            mail_item = dict(
                                (k, v[0])
                                for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item["original_source"] = mail_item.get(
                                "Username", mail_item.get("Email Address", ""))
                            item["headline"] = mail_item.get("Headline", "")
                            item["abstract"] = mail_item.get("Abstract", "")
                            item["slugline"] = mail_item.get("Slugline", "")
                            item["body_html"] = "<p>" + mail_item.get(
                                "Body", "").replace("\n", "</p><p>") + "</p>"

                            default_source = app.config.get(
                                "DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES")
                            city = mail_item.get("Dateline", "")
                            cities = app.locators.find_cities()
                            located = [
                                c for c in cities
                                if c["city"].lower() == city.lower()
                            ]
                            item.setdefault("dateline", {})
                            item["dateline"]["located"] = (
                                located[0] if len(located) > 0 else {
                                    "city_code": city,
                                    "city": city,
                                    "tz": "UTC",
                                    "dateline": "city"
                                })
                            item["dateline"]["source"] = default_source
                            item["dateline"][
                                "text"] = format_dateline_to_locmmmddsrc(
                                    item["dateline"]["located"],
                                    get_date(item["firstcreated"]),
                                    source=default_source)

                            if mail_item.get("Priority") != "":
                                if mail_item.get("Priority", "3").isdigit():
                                    item["priority"] = int(
                                        mail_item.get("Priority", "3"))
                                else:
                                    priority_map = superdesk.get_resource_service(
                                        "vocabularies").find_one(
                                            req=None, _id="priority")
                                    priorities = [
                                        x
                                        for x in priority_map.get("items", [])
                                        if x["name"].upper() == mail_item.get(
                                            "Priority", "").upper()
                                    ]
                                    if priorities is not None and len(
                                            priorities) > 0:
                                        item["priority"] = int(
                                            priorities[0].get("qcode", "3"))
                                    else:
                                        item["priority"] = 3
                            if mail_item.get("News Value") != "":
                                item["urgency"] = int(
                                    mail_item.get("News Value", "3"))

                            # We expect the username passed corresponds to a superdesk user
                            query = {
                                "email":
                                re.compile(
                                    "^{}$".format(
                                        mail_item.get(
                                            "Username",
                                            mail_item.get("Email Address",
                                                          ""))),
                                    re.IGNORECASE,
                                )
                            }
                            user = superdesk.get_resource_service(
                                "users").find_one(req=None, **query)
                            if not user:
                                logger.error(
                                    "Failed to find user for email {}".format(
                                        mail_item.get(
                                            "Username",
                                            mail_item.get("Email Address",
                                                          ""))))
                                raise UserNotRegisteredException()
                            item["original_creator"] = user.get("_id")
                            if BYLINE in user and user.get(BYLINE, ""):
                                item["byline"] = user.get(BYLINE)
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            query = {
                                "name":
                                re.compile(
                                    "^{}$".format(mail_item.get("Desk", "")),
                                    re.IGNORECASE)
                            }
                            desk = superdesk.get_resource_service(
                                "desks").find_one(req=None, **query)
                            if desk:
                                item["task"] = {
                                    "desk": desk.get("_id"),
                                    "stage": desk.get("incoming_stage")
                                }

                            if "Place" in mail_item:
                                locator_map = superdesk.get_resource_service(
                                    "vocabularies").find_one(req=None,
                                                             _id="locators")
                                place = [
                                    x for x in locator_map.get("items", [])
                                    if x["qcode"] == mail_item.get(
                                        "Place", "").upper()
                                ]
                                if place is not None:
                                    item["place"] = place

                            if mail_item.get("Legal flag", "") == "LEGAL":
                                item["flags"] = {"marked_for_legal": True}

                            break

            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 14
0
    def parse(self, data, provider=None):
        config = provider.get('config', {})
        # If the channel is configured to process structured email generated from a google form
        if config.get('formatted', False):
            return self._parse_formatted_email(data, provider)
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item['headline'] = self.parse_header(msg['subject'])
                    field_from = self.parse_header(msg['from'])
                    item['original_source'] = field_from
                    try:
                        if email_regex.findall(field_from):
                            email_address = email_regex.findall(field_from)[0]
                            user = get_resource_service('users').get_user_by_email(email_address)
                            item['original_creator'] = user[eve.utils.config.ID_FIELD]
                    except UserNotRegisteredException:
                        pass
                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}: {2}".format(item['headline'],
                                                                                               field_from, ex))
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = self.safe_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing html body for {0} from {1}: {2}".format(item['headline'],
                                                                                               field_from, ex))
                                continue
                        if part.get_content_maintype() == 'multipart':
                            continue
                        if part.get('Content-Disposition') is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != 'image':
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == 'image/gif' or content_type == 'image/png':
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(content, filename=fileName,
                                                                 content_type=content_type, metadata=metadata)
                            renditions = {'baseImage': {'href': image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item[ITEM_TYPE] = CONTENT_TYPE.COMPOSITE
                                comp_item['guid'] = generate_guid(type=GUID_TAG)
                                comp_item['versioncreated'] = utcnow()
                                comp_item['groups'] = []
                                comp_item['headline'] = item['headline']
                                comp_item['groups'] = []
                                comp_item['original_source'] = item['original_source']
                                if 'original_creator' in item:
                                    comp_item['original_creator'] = item['original_creator']

                                # create a reference to the item that stores the body of the email
                                item_ref = {'guid': item['guid'], 'residRef': item['guid'],
                                            'headline': item['headline'], 'location': 'ingest',
                                            'itemClass': 'icls:text', 'original_source': item['original_source']}
                                if 'original_creator' in item:
                                    item_ref['original_creator'] = item['original_creator']
                                refs.append(item_ref)

                            media_item = dict()
                            media_item['guid'] = generate_guid(type=GUID_TAG)
                            media_item['versioncreated'] = utcnow()
                            media_item[ITEM_TYPE] = CONTENT_TYPE.PICTURE
                            media_item['renditions'] = renditions
                            media_item['mimetype'] = content_type
                            set_filemeta(media_item, metadata)
                            media_item['slugline'] = fileName
                            if text_body is not None:
                                media_item['body_html'] = text_body
                            media_item['headline'] = item['headline']
                            media_item['original_source'] = item['original_source']
                            if 'original_creator' in item:
                                media_item['original_creator'] = item['original_creator']
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {'guid': media_item['guid'], 'residRef': media_item['guid'],
                                         'headline': fileName, 'location': 'ingest', 'itemClass': 'icls:picture',
                                         'original_source': item['original_source']}
                            if 'original_creator' in item:
                                media_ref['original_creator'] = item['original_creator']
                            refs.append(media_ref)

            if html_body is not None:
                item['body_html'] = html_body
            else:
                item['body_html'] = '<pre>' + text_body + '</pre>'
                item[FORMAT] = FORMATS.PRESERVED

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {'refs': [{'idRef': 'main'}], 'id': 'root', 'role': 'grpRole:NEP'}
                comp_item['groups'].append(grefs)

                grefs = {'refs': refs, 'id': 'main', 'role': 'grpRole:Main'}
                comp_item['groups'].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 15
0
    def _parse_formatted_email(self, data, provider):
        """
        Passed an email that was constructed as a notificaton from a google form submission it constructs an item.
        The google form submits to a google sheet, this sheet creates the email as a notification
        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line macthes what we expect, ignore it if not
                    if self.parse_header(msg['subject']) != 'Formatted Editorial Story':
                        return []

                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace('\r\n', '').replace('  ', ' ')
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace('\r\n', '').replace('  ', ' ')

                            mail_item = dict((k, v[0]) for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item['original_source'] = mail_item.get('Username')
                            item['headline'] = mail_item.get('Headline')
                            item['abstract'] = mail_item.get('Abstract')
                            item['slugline'] = mail_item.get('Slugline')
                            item['body_html'] = mail_item.get('Body').replace('\n', '<br />')

                            if mail_item.get('Priority') != '':
                                item['priority'] = int(mail_item.get('Priority'))
                            if mail_item.get('Urgency') != '':
                                item['urgency'] = int(mail_item.get('Urgency'))

                            # We expect the username passed coresponds to a superdesk user
                            query = {'email': re.compile('^{}$'.format(mail_item.get('Username')), re.IGNORECASE)}
                            user = superdesk.get_resource_service('users').find_one(req=None, **query)
                            if not user:
                                logger.error('Failed to find user for email {}'.format(mail_item.get('Username')))
                                raise UserNotRegisteredException()
                            item['original_creator'] = user.get('_id')
                            item['byline'] = user.get(BYLINE, user.get('display_name'))
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            desk = superdesk.get_resource_service('desks').find_one(
                                req=None, name=mail_item.get('Desk'))
                            if desk:
                                item['task'] = {'desk': desk.get('_id'), 'stage': desk.get('incoming_stage')}
                            break
            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 16
0
# Copyright 2013, 2014 Sourcefabric z.u. and contributors.
#
# For the full copyright and license information, please see the
# AUTHORS and LICENSE files distributed with this source code, or
# at https://www.sourcefabric.org/superdesk/license

import imaplib
from .ingest_service import IngestService
from superdesk.io import register_provider
from superdesk.upload import url_for_media
from superdesk.errors import IngestEmailError

from superdesk.io.rfc822 import rfc822Parser

PROVIDER = 'email'
errors = [IngestEmailError.emailError().get_error_description(),
          IngestEmailError.emailLoginError().get_error_description()]


class EmailReaderService(IngestService):

    def __init__(self):
        self.parser = rfc822Parser()

    def _update(self, provider):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))

        try:
            imap = imaplib.IMAP4_SSL(host=server, port=port)
Esempio n. 17
0
    def parse(self, data, provider=None):
        config = provider.get('config', {})
        # If the channel is configured to process structured email generated from a google form
        if config.get('formatted', False):
            return self._parse_formatted_email(data, provider)
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item['headline'] = self.parse_header(msg['subject'])
                    field_from = self.parse_header(msg['from'])
                    item['original_source'] = field_from
                    try:
                        if email_regex.findall(field_from):
                            email_address = email_regex.findall(field_from)[0]
                            user = get_resource_service(
                                'users').get_user_by_email(email_address)
                            item['original_creator'] = user[
                                eve.utils.config.ID_FIELD]
                    except UserNotRegisteredException:
                        pass
                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}: {2}"
                                    .format(item['headline'], field_from, ex))
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = self.safe_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing html body for {0} from {1}: {2}"
                                    .format(item['headline'], field_from, ex))
                                continue
                        if part.get_content_maintype() == 'multipart':
                            continue
                        if part.get('Content-Disposition') is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != 'image':
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(
                                content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == 'image/gif' or content_type == 'image/png':
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(
                                content,
                                filename=fileName,
                                content_type=content_type,
                                metadata=metadata)
                            renditions = {'baseImage': {'href': image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item[ITEM_TYPE] = CONTENT_TYPE.COMPOSITE
                                comp_item['guid'] = generate_guid(
                                    type=GUID_TAG)
                                comp_item['versioncreated'] = utcnow()
                                comp_item['groups'] = []
                                comp_item['headline'] = item['headline']
                                comp_item['groups'] = []
                                comp_item['original_source'] = item[
                                    'original_source']
                                if 'original_creator' in item:
                                    comp_item['original_creator'] = item[
                                        'original_creator']

                                # create a reference to the item that stores the body of the email
                                item_ref = {
                                    'guid': item['guid'],
                                    'residRef': item['guid'],
                                    'headline': item['headline'],
                                    'location': 'ingest',
                                    'itemClass': 'icls:text',
                                    'original_source': item['original_source']
                                }
                                if 'original_creator' in item:
                                    item_ref['original_creator'] = item[
                                        'original_creator']
                                refs.append(item_ref)

                            media_item = dict()
                            media_item['guid'] = generate_guid(type=GUID_TAG)
                            media_item['versioncreated'] = utcnow()
                            media_item[ITEM_TYPE] = CONTENT_TYPE.PICTURE
                            media_item['renditions'] = renditions
                            media_item['mimetype'] = content_type
                            set_filemeta(media_item, metadata)
                            media_item['slugline'] = fileName
                            if text_body is not None:
                                media_item['body_html'] = text_body
                            media_item['headline'] = item['headline']
                            media_item['original_source'] = item[
                                'original_source']
                            if 'original_creator' in item:
                                media_item['original_creator'] = item[
                                    'original_creator']
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {
                                'guid': media_item['guid'],
                                'residRef': media_item['guid'],
                                'headline': fileName,
                                'location': 'ingest',
                                'itemClass': 'icls:picture',
                                'original_source': item['original_source']
                            }
                            if 'original_creator' in item:
                                media_ref['original_creator'] = item[
                                    'original_creator']
                            refs.append(media_ref)

            if html_body is not None:
                item['body_html'] = html_body
            else:
                item['body_html'] = '<pre>' + text_body + '</pre>'
                item[FORMAT] = FORMATS.PRESERVED

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {
                    'refs': [{
                        'idRef': 'main'
                    }],
                    'id': 'root',
                    'role': 'grpRole:NEP'
                }
                comp_item['groups'].append(grefs)

                grefs = {'refs': refs, 'id': 'main', 'role': 'grpRole:Main'}
                comp_item['groups'].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 18
0
class EmailFeedingService(FeedingService):
    """
    Feeding Service class which can read the article(s) from a configured mail box.
    """

    NAME = "email"

    ERRORS = [
        IngestEmailError.emailError().get_error_description(),
        IngestEmailError.emailLoginError().get_error_description(),
    ]

    label = "Email"

    fields = [
        {
            "id": "server",
            "type": "text",
            "label": l_("Email Server"),
            "placeholder": "Email Server",
            "required": True,
            "errors": {
                6003: "Server not found.",
                6002: "Unexpected server response"
            },
        },
        {
            "id": "port",
            "type": "text",
            "label": l_("Email Server Port"),
            "placeholder": "Email Server Port",
            "required": True,
            "default": "993",
        },
        {
            "id": "user",
            "type": "text",
            "label": l_("User"),
            "placeholder": "User",
            "required": True
        },
        {
            "id": "password",
            "type": "password",
            "label": l_("Password"),
            "placeholder": "Password",
            "required": True,
            "errors": {
                6000: "Authentication error."
            },
        },
        {
            "id": "mailbox",
            "type": "text",
            "label": l_("Mailbox"),
            "placeholder": "Mailbox",
            "required": True,
            "errors": {
                6004: "Authentication error."
            },
        },
        {
            "id": "formatted",
            "type": "boolean",
            "label": l_("Formatted Email Parser"),
            "required": True
        },
        {
            "id": "filter",
            "type": "text",
            "label": l_("Filter"),
            "placeholder": "Filter",
            "required": False
        },
    ]

    def _test(self, provider):
        self._update(provider, update=None, test=True)

    def authenticate(self, provider: dict, config: dict) -> imaplib.IMAP4_SSL:
        server = config.get("server", "")
        port = int(config.get("port", 993))
        try:
            socket.setdefaulttimeout(app.config.get("EMAIL_TIMEOUT", 10))
            imap = imaplib.IMAP4_SSL(host=server, port=port)
        except (socket.gaierror, OSError) as e:
            raise IngestEmailError.emailHostError(exception=e,
                                                  provider=provider)

        try:
            imap.login(config.get("user", None), config.get("password", None))
        except imaplib.IMAP4.error:
            raise IngestEmailError.emailLoginError(imaplib.IMAP4.error,
                                                   provider)

        return imap

    def _update(self, provider, update, test=False):
        config = provider.get("config", {})
        new_items = []

        try:
            imap = self.authenticate(provider, config)

            try:
                rv, data = imap.select(config.get("mailbox", None),
                                       readonly=False)
                if rv != "OK":
                    raise IngestEmailError.emailMailboxError()
                try:
                    rv, data = imap.search(None,
                                           config.get("filter", "(UNSEEN)"))
                    if rv != "OK":
                        raise IngestEmailError.emailFilterError()
                    for num in data[0].split():
                        rv, data = imap.fetch(num, "(RFC822)")
                        if rv == "OK" and not test:
                            try:
                                parser = self.get_feed_parser(provider, data)
                                new_items.append(parser.parse(data, provider))
                                rv, data = imap.store(num, "+FLAGS", "\\Seen")
                            except IngestEmailError:
                                continue
                finally:
                    imap.close()
            finally:
                imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items

    def prepare_href(self, href, mimetype=None):
        return url_for_media(href, mimetype)
Esempio n. 19
0
    def parse(self, data, provider=None):
        config = provider.get("config", {})
        # If the channel is configured to process structured email generated from a google form
        if config.get("formatted", False):
            return self._parse_formatted_email(data, provider)
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item["versioncreated"] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item["headline"] = self.parse_header(msg["subject"])
                    field_from = self.parse_header(msg["from"])
                    item["original_source"] = field_from
                    try:
                        if email_regex.findall(field_from):
                            email_address = email_regex.findall(field_from)[0]
                            user = get_resource_service(
                                "users").get_user_by_email(email_address)
                            item["original_creator"] = user[
                                eve.utils.config.ID_FIELD]
                    except UserNotRegisteredException:
                        pass
                    item["guid"] = msg["Message-ID"]
                    date_tuple = email.utils.parsedate_tz(msg["Date"])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone("utc"))
                        item["firstcreated"] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}: {2}"
                                    .format(item["headline"], field_from, ex))
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = sanitize_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing html body for {0} from {1}: {2}"
                                    .format(item["headline"], field_from, ex))
                                continue
                        if part.get_content_maintype() == "multipart":
                            continue
                        if part.get("Content-Disposition") is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != "image":
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(
                                content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == "image/gif" or content_type == "image/png":
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(
                                content,
                                filename=fileName,
                                content_type=content_type,
                                metadata=metadata)
                            renditions = {"baseImage": {"href": image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item[ITEM_TYPE] = CONTENT_TYPE.COMPOSITE
                                comp_item["guid"] = generate_guid(
                                    type=GUID_TAG)
                                comp_item["versioncreated"] = utcnow()
                                comp_item["groups"] = []
                                comp_item["headline"] = item["headline"]
                                comp_item["groups"] = []
                                comp_item["original_source"] = item[
                                    "original_source"]
                                if "original_creator" in item:
                                    comp_item["original_creator"] = item[
                                        "original_creator"]

                                # create a reference to the item that stores the body of the email
                                item_ref = {
                                    "guid": item["guid"],
                                    "residRef": item["guid"],
                                    "headline": item["headline"],
                                    "location": "ingest",
                                    "itemClass": "icls:text",
                                    "original_source": item["original_source"],
                                }
                                if "original_creator" in item:
                                    item_ref["original_creator"] = item[
                                        "original_creator"]
                                refs.append(item_ref)

                            media_item = dict()
                            media_item["guid"] = generate_guid(type=GUID_TAG)
                            media_item["versioncreated"] = utcnow()
                            media_item[ITEM_TYPE] = CONTENT_TYPE.PICTURE
                            media_item["renditions"] = renditions
                            media_item["mimetype"] = content_type
                            set_filemeta(media_item, metadata)
                            media_item["slugline"] = fileName
                            if text_body is not None:
                                media_item["body_html"] = text_body
                            media_item["headline"] = item["headline"]
                            media_item["original_source"] = item[
                                "original_source"]
                            if "original_creator" in item:
                                media_item["original_creator"] = item[
                                    "original_creator"]
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {
                                "guid": media_item["guid"],
                                "residRef": media_item["guid"],
                                "headline": fileName,
                                "location": "ingest",
                                "itemClass": "icls:picture",
                                "original_source": item["original_source"],
                            }
                            if "original_creator" in item:
                                media_ref["original_creator"] = item[
                                    "original_creator"]
                            refs.append(media_ref)

            if html_body:
                item["body_html"] = html_body
            else:
                item["body_html"] = "<pre>" + text_body + "</pre>"
                item[FORMAT] = FORMATS.PRESERVED

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {
                    "refs": [{
                        "idRef": "main"
                    }],
                    "id": "root",
                    "role": "grpRole:NEP"
                }
                comp_item["groups"].append(grefs)

                grefs = {"refs": refs, "id": "main", "role": "grpRole:Main"}
                comp_item["groups"].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 20
0
class GMailFeedingService(EmailFeedingService):
    """
    Feeding Service class which can read the article(s) from a configured mail box.
    """

    NAME = "gmail"

    ERRORS = [
        IngestEmailError.emailError().get_error_description(),
        IngestEmailError.emailLoginError().get_error_description(),
    ]

    label = "Gmail"

    fields = [
        {
            "id": "email",
            "type": "text",
            "label": l_("email"),
            "readonly": True,
            "show_expression": "provider.config['email'] != null",
        },
        {
            "id":
            "log_in_url",
            "type":
            "url_request",
            "label":
            l_("Log-in with GMail"),
            # provider._id != null              provider has to be saved before trying to log in
            # provider.config['email'] == null  do not display log-in button if logged-in already
            "show_expression":
            "provider._id != null && provider.config['email'] == null",
        },
        {
            "id": "log_out_url",
            "type": "url_request",
            "label": l_("Log-out"),
            # provider.config['email'] != null  only display log-out button if already logged in
            "show_expression": "provider.config['email'] != null",
        },
        {
            "id": "mailbox",
            "type": "text",
            "label": l_("Mailbox"),
            "default_value": "INBOX",
            "placeholder": l_("Mailbox"),
            "required": True,
            "errors": {
                6004: "Authentication error."
            },
        },
        {
            "id": "filter",
            "type": "text",
            "label": l_("Filter"),
            "placeholder": "Filter",
            "required": False
        },
    ]

    @classmethod
    def init_app(cls, app):
        # we need to access config to set the URL, so we do it here
        field = next(f for f in cls.fields if f["id"] == "log_in_url")
        field["url"] = join(app.config["SERVER_URL"], "login", "google",
                            "{PROVIDER_ID}")
        field = next(f for f in cls.fields if f["id"] == "log_out_url")
        field["url"] = join(app.config["SERVER_URL"], "logout", "google",
                            "{PROVIDER_ID}")

    def _test(self, provider):
        self._update(provider, update=None, test=True)

    def authenticate(self, provider: dict, config: dict) -> imaplib.IMAP4_SSL:
        oauth2_token_service = superdesk.get_resource_service("oauth2_token")
        token = oauth2_token_service.find_one(req=None,
                                              _id=ObjectId(provider["_id"]))
        if token is None:
            raise IngestEmailError.notConfiguredError(ValueError(
                l_("You need to log in first")),
                                                      provider=provider)
        imap = imaplib.IMAP4_SSL("imap.gmail.com")

        if token["expires_at"].timestamp() < time.time() + 600:
            logger.info("Refreshing token for {provider_name}".format(
                provider_name=provider["name"]))
            token = oauth.refresh_google_token(token["_id"])

        auth_string = "user={email}\x01auth=Bearer {token}\x01\x01".format(
            email=token["email"], token=token["access_token"])
        imap.authenticate("XOAUTH2", lambda __: auth_string.encode())
        return imap

    def parse_extra(self, imap: imaplib.IMAP4_SSL, num: str,
                    parsed_items: List[dict]) -> None:
        """Add GMail labels to parsed_items"""
        try:
            # we use GMail IMAP Extensions
            # https://developers.google.com/gmail/imap/imap-extensions#access_to_gmail_labels_x-gm-labels
            _, data = imap.fetch(num, "(X-GM-LABELS)")
            # it seems that there is nothing to help parsing in standard lib
            # thus we use some regex to get our labels
            data_bytes = data[0]
            if not isinstance(data_bytes, bytes):
                raise ValueError(f"Unexpected data type: {type(data_bytes)}")
            data_str = data_bytes.decode("utf-7")
            match_labels_str = RE_LABELS_STR.search(data_str)
            if match_labels_str is None:
                raise ValueError(
                    f"Can't find the expected label string in data: {data_str:r}"
                )
            labels_str = match_labels_str.group(1)
            labels = [(m.group("quoted")
                       or m.group("unquoted")).replace('\\"', '"')
                      for m in RE_LABEL.finditer(labels_str)]
            for parsed_item in parsed_items:
                subjects = parsed_item.setdefault("subject", [])
                for label in labels:
                    subjects.append({
                        "name": label,
                        "qcode": label,
                        "scheme": "gmail_label"
                    })
        except Exception:
            logger.exception("Can't retrieve GMail labels")
Esempio n. 21
0
class GMailFeedingService(EmailFeedingService):
    """
    Feeding Service class which can read the article(s) from a configured mail box.
    """

    NAME = "gmail"

    ERRORS = [
        IngestEmailError.emailError().get_error_description(),
        IngestEmailError.emailLoginError().get_error_description(),
    ]

    label = "Gmail"

    fields = [
        {
            "type": "url_request",
            "label": l_("Log-in with GMail"),
        },
        {
            "id":
            "email",
            "type":
            "string",
            "label":
            l_("email"),
            "readonly":
            True,
            "placeholder":
            l_("This field will be automatically filled once you've logged using log-in button above"
               ),
        },
        {
            "id": "mailbox",
            "type": "text",
            "label": l_("Mailbox"),
            "default_value": "INBOX",
            "placeholder": l_("Mailbox"),
            "required": True,
            "errors": {
                6004: "Authentication error."
            },
        },
        {
            "id": "filter",
            "type": "text",
            "label": l_("Filter"),
            "placeholder": "Filter",
            "required": False
        },
    ]

    @classmethod
    def init_app(cls, app):
        # we need to access config to set the URL, so we do it here
        field = next(f for f in cls.fields if f["type"] == "url_request")
        field["url"] = join(app.config["SERVER_URL"], "login", "google",
                            "{URL_ID}")

    def _test(self, provider):
        self._update(provider, update=None, test=True)

    def authenticate(self, provider: dict, config: dict) -> imaplib.IMAP4_SSL:
        oauth2_token_service = superdesk.get_resource_service("oauth2_token")
        token = oauth2_token_service.find_one(req=None, _id=provider["url_id"])
        if token is None:
            raise IngestEmailError.notConfiguredError(ValueError(
                l_("You need to log in first")),
                                                      provider=provider)
        imap = imaplib.IMAP4_SSL("imap.gmail.com")

        if token["expires_at"].timestamp() < time.time() + 600:
            logger.info("Refreshing token for {provider_name}".format(
                provider_name=provider["name"]))
            token = oauth.refresh_google_token(token["_id"])

        auth_string = "user={email}\x01auth=Bearer {token}\x01\x01".format(
            email=token["email"], token=token["access_token"])
        imap.authenticate("XOAUTH2", lambda __: auth_string.encode())
        return imap
Esempio n. 22
0
    def parse_email(self, data, provider):
        try:
            new_items = []
            # create an item for the body text of the email
            # either text or html
            item = dict()
            item['type'] = 'text'
            item['versioncreated'] = utcnow()

            comp_item = None

            # a list to keep the references to the attachments
            refs = []

            html_body = None
            text_body = None

            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    item['headline'] = self.parse_header(msg['subject'])
                    item['original_creator'] = self.parse_header(msg['from'])
                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    # this will loop through all the available multiparts in mail
                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            try:
                                # if we don't know the charset just have a go!
                                if part.get_content_charset() is None:
                                    text_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    text_body = body.decode(charset)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text body for {0} from {1}".format(item['headline'],
                                                                                          item['original_creator']), ex)
                                continue
                        if part.get_content_type() == "text/html":
                            body = part.get_payload(decode=True)
                            try:
                                if part.get_content_charset() is None:
                                    html_body = body.decode()
                                else:
                                    charset = part.get_content_charset()
                                    html_body = body.decode(charset)
                                html_body = self.safe_html(html_body)
                                continue
                            except Exception as ex:
                                logger.exception(
                                    "Exception parsing text html for {0} from {1}".format(item['headline'],
                                                                                          item['original_creator']), ex)
                                continue
                        if part.get_content_maintype() == 'multipart':
                            continue
                        if part.get('Content-Disposition') is None:
                            continue
                        # we are only going to pull off image attachments at this stage
                        if part.get_content_maintype() != 'image':
                            continue

                        fileName = part.get_filename()
                        if bool(fileName):
                            image = part.get_payload(decode=True)
                            content = io.BytesIO(image)
                            res = process_file_from_stream(content, part.get_content_type())
                            file_name, content_type, metadata = res
                            if content_type == 'image/gif' or content_type == 'image/png':
                                continue
                            content.seek(0)
                            image_id = self.parser_app.media.put(content, filename=fileName,
                                                                 content_type=content_type, metadata=metadata)
                            renditions = {'baseImage': {'href': image_id}}

                            # if we have not got a composite item then create one
                            if not comp_item:
                                comp_item = dict()
                                comp_item['type'] = 'composite'
                                comp_item['guid'] = generate_guid(type=GUID_TAG)
                                comp_item['versioncreated'] = utcnow()
                                comp_item['groups'] = []
                                comp_item['headline'] = item['headline']
                                comp_item['groups'] = []

                                # create a reference to the item that stores the body of the email
                                item_ref = {}
                                item_ref['guid'] = item['guid']
                                item_ref['residRef'] = item['guid']
                                item_ref['headline'] = item['headline']
                                item_ref['location'] = 'ingest'
                                item_ref['itemClass'] = 'icls:text'
                                refs.append(item_ref)

                            media_item = dict()
                            media_item['guid'] = generate_guid(type=GUID_TAG)
                            media_item['versioncreated'] = utcnow()
                            media_item['type'] = 'picture'
                            media_item['renditions'] = renditions
                            media_item['mimetype'] = content_type
                            media_item['filemeta'] = metadata
                            media_item['slugline'] = fileName
                            if text_body is not None:
                                media_item['body_html'] = text_body
                            media_item['headline'] = item['headline']
                            new_items.append(media_item)

                            # add a reference to this item in the composite item
                            media_ref = {}
                            media_ref['guid'] = media_item['guid']
                            media_ref['residRef'] = media_item['guid']
                            media_ref['headline'] = fileName
                            media_ref['location'] = 'ingest'
                            media_ref['itemClass'] = 'icls:picture'
                            refs.append(media_ref)

            if html_body is not None:
                item['body_html'] = html_body
            else:
                item['body_html'] = text_body
                item['type'] = 'preformatted'

            # if there is composite item then add the main group and references
            if comp_item:
                grefs = {}
                grefs['refs'] = [{'idRef': 'main'}]
                grefs['id'] = 'root'
                grefs['role'] = 'grpRole:NEP'
                comp_item['groups'].append(grefs)

                grefs = {}
                grefs['refs'] = refs
                grefs['id'] = 'main'
                grefs['role'] = 'grpRole:Main'
                comp_item['groups'].append(grefs)

                new_items.append(comp_item)

            new_items.append(item)
            return new_items
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 23
0
    def _parse_formatted_email(self, data, provider):
        """Construct an item from an email that was constructed as a notification from a google form submission.

        The google form submits to a google sheet, this sheet creates the email as a notification

        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line matches what we expect, ignore it if not
                    if self.parse_header(msg['subject']) != 'Formatted Editorial Story':
                        return []

                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace('\r\n', '').replace('  ', ' ')
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace('\r\n', '').replace('  ', ' ')

                            mail_item = dict((k, v[0]) for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item['original_source'] = mail_item.get('Username', '')
                            item['headline'] = mail_item.get('Headline', '')
                            item['abstract'] = mail_item.get('Abstract', '')
                            item['slugline'] = mail_item.get('Slugline', '')
                            item['body_html'] = '<p>' + mail_item.get('Body', '').replace('\n', '</p><p>') + '</p>'

                            default_source = app.config.get('DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES')
                            city = mail_item.get('Dateline', '')
                            cities = app.locators.find_cities()
                            located = [c for c in cities if c['city'].lower() == city.lower()]
                            item.setdefault('dateline', {})
                            item['dateline']['located'] = located[0] if len(located) > 0 else {'city_code': city,
                                                                                               'city': city,
                                                                                               'tz': 'UTC',
                                                                                               'dateline': 'city'}
                            item['dateline']['source'] = default_source
                            item['dateline']['text'] = format_dateline_to_locmmmddsrc(item['dateline']['located'],
                                                                                      get_date(item['firstcreated']),
                                                                                      source=default_source)

                            if mail_item.get('Priority') != '':
                                if mail_item.get('Priority', '3').isdigit():
                                    item['priority'] = int(mail_item.get('Priority', '3'))
                                else:
                                    priority_map = superdesk.get_resource_service('vocabularies').find_one(
                                        req=None, _id='priority')
                                    priorities = [x for x in priority_map.get('items', []) if
                                                  x['name'].upper() == mail_item.get('Priority', '').upper()]
                                    if priorities is not None and len(priorities) > 0:
                                        item['priority'] = int(priorities[0].get('qcode', '3'))
                                    else:
                                        item['priority'] = 3
                            if mail_item.get('News Value') != '':
                                item['urgency'] = int(mail_item.get('News Value', '3'))

                            # We expect the username passed corresponds to a superdesk user
                            query = {'email': re.compile('^{}$'.format(mail_item.get('Username')), re.IGNORECASE)}
                            user = superdesk.get_resource_service('users').find_one(req=None, **query)
                            if not user:
                                logger.error('Failed to find user for email {}'.format(mail_item.get('Username')))
                                raise UserNotRegisteredException()
                            item['original_creator'] = user.get('_id')
                            if BYLINE in user and user.get(BYLINE, ''):
                                item['byline'] = user.get(BYLINE)
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            query = {'name': re.compile('^{}$'.format(mail_item.get('Desk', '')), re.IGNORECASE)}
                            desk = superdesk.get_resource_service('desks').find_one(
                                req=None, **query)
                            if desk:
                                item['task'] = {'desk': desk.get('_id'), 'stage': desk.get('incoming_stage')}

                            if 'Place' in mail_item:
                                locator_map = superdesk.get_resource_service('vocabularies').find_one(req=None,
                                                                                                      _id='locators')
                                place = [x for x in locator_map.get('items', []) if
                                         x['qcode'] == mail_item.get('Place', '').upper()]
                                if place is not None:
                                    item['place'] = place

                            if mail_item.get('Legal flag', '') == 'LEGAL':
                                item['flags'] = {'marked_for_legal': True}

                            break

            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 24
0
    def _parse_formatted_email(self, data, provider):
        """Construct an item from an email that was constructed as a notification from a google form submission.

        The google form submits to a google sheet, this sheet creates the email as a notification

        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line matches what we expect, ignore it if not
                    if self.parse_header(
                            msg['subject']) != 'Formatted Editorial Story':
                        return []

                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace('\r\n',
                                                                 '').replace(
                                                                     '  ', ' ')
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace(
                                    '\r\n', '').replace('  ', ' ')

                            mail_item = dict(
                                (k, v[0])
                                for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item['original_source'] = mail_item.get(
                                'Username', '')
                            item['headline'] = mail_item.get('Headline', '')
                            item['abstract'] = mail_item.get('Abstract', '')
                            item['slugline'] = mail_item.get('Slugline', '')
                            item['body_html'] = '<p>' + mail_item.get(
                                'Body', '').replace('\n', '</p><p>') + '</p>'

                            default_source = app.config.get(
                                'DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES')
                            city = mail_item.get('Dateline', '')
                            cities = app.locators.find_cities()
                            located = [
                                c for c in cities
                                if c['city'].lower() == city.lower()
                            ]
                            item.setdefault('dateline', {})
                            item['dateline']['located'] = located[0] if len(
                                located) > 0 else {
                                    'city_code': city,
                                    'city': city,
                                    'tz': 'UTC',
                                    'dateline': 'city'
                                }
                            item['dateline']['source'] = default_source
                            item['dateline'][
                                'text'] = format_dateline_to_locmmmddsrc(
                                    item['dateline']['located'],
                                    get_date(item['firstcreated']),
                                    source=default_source)

                            if mail_item.get('Priority') != '':
                                if mail_item.get('Priority', '3').isdigit():
                                    item['priority'] = int(
                                        mail_item.get('Priority', '3'))
                                else:
                                    priority_map = superdesk.get_resource_service(
                                        'vocabularies').find_one(
                                            req=None, _id='priority')
                                    priorities = [
                                        x
                                        for x in priority_map.get('items', [])
                                        if x['name'].upper() == mail_item.get(
                                            'Priority', '').upper()
                                    ]
                                    if priorities is not None and len(
                                            priorities) > 0:
                                        item['priority'] = int(
                                            priorities[0].get('qcode', '3'))
                                    else:
                                        item['priority'] = 3
                            if mail_item.get('News Value') != '':
                                item['urgency'] = int(
                                    mail_item.get('News Value', '3'))

                            # We expect the username passed corresponds to a superdesk user
                            query = {
                                'email':
                                re.compile(
                                    '^{}$'.format(mail_item.get('Username')),
                                    re.IGNORECASE)
                            }
                            user = superdesk.get_resource_service(
                                'users').find_one(req=None, **query)
                            if not user:
                                logger.error(
                                    'Failed to find user for email {}'.format(
                                        mail_item.get('Username')))
                                raise UserNotRegisteredException()
                            item['original_creator'] = user.get('_id')
                            if BYLINE in user and user.get(BYLINE, ''):
                                item['byline'] = user.get(BYLINE)
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            query = {
                                'name':
                                re.compile(
                                    '^{}$'.format(mail_item.get('Desk', '')),
                                    re.IGNORECASE)
                            }
                            desk = superdesk.get_resource_service(
                                'desks').find_one(req=None, **query)
                            if desk:
                                item['task'] = {
                                    'desk': desk.get('_id'),
                                    'stage': desk.get('incoming_stage')
                                }

                            if 'Place' in mail_item:
                                locator_map = superdesk.get_resource_service(
                                    'vocabularies').find_one(req=None,
                                                             _id='locators')
                                place = [
                                    x for x in locator_map.get('items', [])
                                    if x['qcode'] == mail_item.get(
                                        'Place', '').upper()
                                ]
                                if place is not None:
                                    item['place'] = place

                            if mail_item.get('Legal flag', '') == 'LEGAL':
                                item['flags'] = {'marked_for_legal': True}

                            break

            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 25
0
    def _parse_formatted_email(self, data, provider):
        """
        Passed an email that was constructed as a notification from a google form submission it constructs an item.
        The google form submits to a google sheet, this sheet creates the email as a notification
        :param data:
        :param provider:
        :return: A list of 1 item
        """
        try:
            item = dict()
            item[ITEM_TYPE] = CONTENT_TYPE.TEXT
            item['versioncreated'] = utcnow()
            for response_part in data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_bytes(response_part[1])
                    # Check that the subject line matches what we expect, ignore it if not
                    if self.parse_header(
                            msg['subject']) != 'Formatted Editorial Story':
                        return []

                    item['guid'] = msg['Message-ID']
                    date_tuple = email.utils.parsedate_tz(msg['Date'])
                    if date_tuple:
                        dt = datetime.datetime.utcfromtimestamp(
                            email.utils.mktime_tz(date_tuple))
                        dt = dt.replace(tzinfo=timezone('utc'))
                        item['firstcreated'] = dt

                    for part in msg.walk():
                        if part.get_content_type() == "text/plain":
                            body = part.get_payload(decode=True)
                            # if we don't know the charset just have a go!
                            if part.get_content_charset() is None:
                                json_str = body.decode().replace('\r\n',
                                                                 '').replace(
                                                                     '  ', ' ')
                            else:
                                charset = part.get_content_charset()
                                json_str = body.decode(charset).replace(
                                    '\r\n', '').replace('  ', ' ')

                            mail_item = dict(
                                (k, v[0])
                                for k, v in json.loads(json_str).items())

                            self._expand_category(item, mail_item)

                            item['original_source'] = mail_item.get('Username')
                            item['headline'] = mail_item.get('Headline')
                            item['abstract'] = mail_item.get('Abstract')
                            item['slugline'] = mail_item.get('Slugline')
                            item['body_html'] = mail_item.get('Body').replace(
                                '\n', '<br />')

                            if mail_item.get('Priority') != '':
                                item['priority'] = int(
                                    mail_item.get('Priority'))
                            if mail_item.get('Urgency') != '':
                                item['urgency'] = int(mail_item.get('Urgency'))

                            # We expect the username passed corresponds to a superdesk user
                            query = {
                                'email':
                                re.compile(
                                    '^{}$'.format(mail_item.get('Username')),
                                    re.IGNORECASE)
                            }
                            user = superdesk.get_resource_service(
                                'users').find_one(req=None, **query)
                            if not user:
                                logger.error(
                                    'Failed to find user for email {}'.format(
                                        mail_item.get('Username')))
                                raise UserNotRegisteredException()
                            item['original_creator'] = user.get('_id')
                            item['byline'] = user.get(BYLINE,
                                                      user.get('display_name'))
                            item[SIGN_OFF] = user.get(SIGN_OFF)

                            # attempt to match the given desk name against the defined desks
                            desk = superdesk.get_resource_service(
                                'desks').find_one(req=None,
                                                  name=mail_item.get('Desk'))
                            if desk:
                                item['task'] = {
                                    'desk': desk.get('_id'),
                                    'stage': desk.get('incoming_stage')
                                }
                            break
            return [item]
        except Exception as ex:
            raise IngestEmailError.emailParseError(ex, provider)
Esempio n. 26
0
class EmailFeedingService(FeedingService):
    """
    Feeding Service class which can read the article(s) from a configured mail box.
    """

    NAME = 'email'

    ERRORS = [
        IngestEmailError.emailError().get_error_description(),
        IngestEmailError.emailLoginError().get_error_description()
    ]

    label = 'Email'

    fields = [{
        'id': 'server',
        'type': 'text',
        'label': 'Email Server',
        'placeholder': 'Email Server',
        'required': True,
        'errors': {
            6003: 'Server not found.',
            6002: 'Unexpected server response'
        }
    }, {
        'id': 'port',
        'type': 'text',
        'label': 'Email Server Port',
        'placeholder': 'Email Server Port',
        'required': True,
        'default': '993'
    }, {
        'id': 'user',
        'type': 'text',
        'label': 'User',
        'placeholder': 'User',
        'required': True
    }, {
        'id': 'password',
        'type': 'password',
        'label': 'Password',
        'placeholder': 'Password',
        'required': True,
        'errors': {
            6000: 'Authentication error.'
        }
    }, {
        'id': 'mailbox',
        'type': 'text',
        'label': 'Mailbox',
        'placeholder': 'Mailbox',
        'required': True,
        'errors': {
            6004: 'Authentication error.'
        }
    }, {
        'id': 'formatted',
        'type': 'boolean',
        'label': 'Formatted Email Parser',
        'required': True
    }, {
        'id': 'filter',
        'type': 'text',
        'label': 'Filter',
        'placeholder': 'Filter',
        'required': True
    }]

    def _test(self, provider):
        self._update(provider, update=None, test=True)

    def _update(self, provider, update, test=False):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))
        new_items = []

        try:
            try:
                socket.setdefaulttimeout(app.config.get('EMAIL_TIMEOUT', 10))
                imap = imaplib.IMAP4_SSL(host=server, port=port)
            except (socket.gaierror, OSError) as e:
                raise IngestEmailError.emailHostError(exception=e)

            try:
                imap.login(config.get('user', None),
                           config.get('password', None))
            except imaplib.IMAP4.error:
                raise IngestEmailError.emailLoginError(imaplib.IMAP4.error,
                                                       provider)

            try:
                rv, data = imap.select(config.get('mailbox', None),
                                       readonly=False)
                if rv != 'OK':
                    raise IngestEmailError.emailMailboxError()
                try:
                    rv, data = imap.search(None,
                                           config.get('filter', '(UNSEEN)'))
                    if rv != 'OK':
                        raise IngestEmailError.emailFilterError()
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK' and not test:
                            try:
                                parser = self.get_feed_parser(provider, data)
                                new_items.append(parser.parse(data, provider))
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                finally:
                    imap.close()
            finally:
                imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items

    def prepare_href(self, href, mimetype=None):
        return url_for_media(href, mimetype)
Esempio n. 27
0
#
# For the full copyright and license information, please see the
# AUTHORS and LICENSE files distributed with this source code, or
# at https://www.sourcefabric.org/superdesk/license

import imaplib
from .ingest_service import IngestService
from superdesk.io import register_provider
from superdesk.upload import url_for_media
from superdesk.errors import IngestEmailError

from superdesk.io.rfc822 import rfc822Parser

PROVIDER = 'email'
errors = [
    IngestEmailError.emailError().get_error_description(),
    IngestEmailError.emailLoginError().get_error_description()
]


class EmailReaderService(IngestService):
    def __init__(self):
        self.parser = rfc822Parser()

    def _update(self, provider):
        config = provider.get('config', {})
        server = config.get('server', '')
        port = int(config.get('port', 993))

        try:
            imap = imaplib.IMAP4_SSL(host=server, port=port)