Esempio n. 1
0
def _unquotevalue(value):
    # This is different than utils.collapse_rfc2231_value() because it doesn't
    # try to convert the value to a unicode.  Message.get_param() and
    # Message.get_params() are both currently defined to return the tuple in
    # the face of RFC 2231 parameters.
    if isinstance(value, tuple):
        return value[0], value[1], utils.unquote(value[2])
    else:
        return utils.unquote(value)
Esempio n. 2
0
def _unquotevalue(value):
    # This is different than utils.collapse_rfc2231_value() because it doesn't
    # try to convert the value to a unicode.  Message.get_param() and
    # Message.get_params() are both currently defined to return the tuple in
    # the face of RFC 2231 parameters.
    if isinstance(value, tuple):
        return value[0], value[1], utils.unquote(value[2])
    else:
        return utils.unquote(value)
Esempio n. 3
0
    def from_mbox(self, mbfile):
        """ Upload all the emails in a mbox file into the database using
        kittystore API.

        :arg mbfile, a mailbox file from which the emails are extracted and
        upload to the database.
        :arg list_name, the fully qualified list name.
        """
        cnt_imported = 0
        cnt_read = 0
        for message in mailbox.mbox(mbfile):
            if self.since:
                date = message["date"]
                if date:
                    try:
                        date = awarify(parse(date))
                    except ValueError, e:
                        print "Can't parse date string in message %s: %s" \
                              % (message["message-id"], date)
                        print e
                        continue
                    if date < self.since:
                        continue
            cnt_read = cnt_read + 1
            self.total_imported += 1
            if self.verbose:
                print "%s (%d)" % (message["Message-Id"], self.total_imported)
            # Un-wrap the subject line if necessary
            if message["subject"]:
                message.replace_header(
                    "subject", TEXTWRAP_RE.sub(" ", message["subject"]))
            # Try to find the mailing-list subject prefix in the first email
            if cnt_read == 1:
                subject_prefix = PREFIX_RE.search(message["subject"])
                if subject_prefix:
                    self.mlist.display_name = unicode(subject_prefix.group(1))
            if self.force_import:
                while self.store.is_message_in_list(
                        self.mlist.fqdn_listname,
                        unquote(message["Message-Id"])):
                    oldmsgid = message["Message-Id"]
                    message.replace_header(
                        "Message-Id", "<%s-%s>" %
                        (unquote(message["Message-Id"]), str(randint(0, 100))))
                    print(
                        "Found duplicate, changing message id from %s to %s" %
                        (oldmsgid, message["Message-Id"]))
            # Parse message to search for attachments
            try:
                attachments = self.extract_attachments(message)
            except DownloadError, e:
                print(
                    "Could not download one of the attachments! "
                    "Skipping this message. Error: %s" % e.args[0])
                continue
Esempio n. 4
0
    def from_mbox(self, mbfile):
        """ Upload all the emails in a mbox file into the database using
        kittystore API.

        :arg mbfile, a mailbox file from which the emails are extracted and
        upload to the database.
        :arg list_name, the fully qualified list name.
        """
        self.store.search_index = make_delayed(self.store.search_index)
        cnt_imported = 0
        cnt_read = 0
        for message in mailbox.mbox(mbfile):
            if self.since:
                date = message["date"]
                if date:
                    try:
                        date = awarify(parse(date))
                    except ValueError, e:
                        print "Can't parse date string in message %s: %s" \
                              % (message["message-id"], date)
                        print e
                        continue
                    if date < self.since:
                        continue
            cnt_read = cnt_read + 1
            self.total_imported += 1
            if self.verbose:
                print "%s (%d)" % (message["Message-Id"], self.total_imported)
            # Un-wrap the subject line if necessary
            if message["subject"]:
                message.replace_header("subject",
                        TEXTWRAP_RE.sub(" ", message["subject"]))
            # Try to find the mailing-list subject prefix in the first email
            if not self.mlist.subject_prefix and message["subject"]:
                subject_prefix = PREFIX_RE.search(message["subject"])
                if subject_prefix:
                    self.mlist.subject_prefix = unicode(subject_prefix.group(1))
            if self.force_import:
                while self.store.is_message_in_list(
                            self.mlist.fqdn_listname,
                            unquote(message["Message-Id"])):
                    oldmsgid = message["Message-Id"]
                    message.replace_header("Message-Id",
                            "<%s-%s>" % (unquote(message["Message-Id"]),
                                         str(randint(0, 100))))
                    print("Found duplicate, changing message id from %s to %s"
                          % (oldmsgid, message["Message-Id"]))
            # Parse message to search for attachments
            try:
                attachments = self.extract_attachments(message)
            except DownloadError, e:
                print ("Could not download one of the attachments! "
                       "Skipping this message. Error: %s" % e.args[0])
                continue
Esempio n. 5
0
    def from_mbox(self, mbfile):
        """ Upload all the emails in a mbox file into the database using
        kittystore API.

        :arg mbfile, a mailbox file from which the emails are extracted and
        upload to the database.
        :arg list_name, the fully qualified list name.
        """
        cnt_imported = 0
        cnt_read = 0
        for message in mailbox.mbox(mbfile):
            cnt_read = cnt_read + 1
            self.total_imported += 1
            # Un-wrap the subject line if necessary
            if message["subject"]:
                message.replace_header("subject",
                        TEXTWRAP_RE.sub(" ", message["subject"]))
            # Try to find the mailing-list subject prefix in the first email
            if cnt_read == 1:
                subject_prefix = PREFIX_RE.search(message["subject"])
                if subject_prefix:
                    self.mlist.display_name = unicode(subject_prefix.group(1))
            if self.force_import:
                while self.store.is_message_in_list(
                            self.mlist.fqdn_listname,
                            unquote(message["Message-Id"])):
                    oldmsgid = message["Message-Id"]
                    message.replace_header("Message-Id",
                            "<%s-%s>" % (unquote(message["Message-Id"]),
                                         str(randint(0, 100))))
                    print("Found duplicate, changing message id from %s to %s"
                          % (oldmsgid, message["Message-Id"]))
            # Parse message to search for attachments
            try:
                attachments = self.extract_attachments(message)
            except DownloadError, e:
                print ("Could not download one of the attachments! "
                       "Skipping this message. Error: %s" % e.args[0])
                continue
            # Now insert the message
            try:
                self.store.add_to_list(self.mlist, message)
            except ValueError, e:
                if len(e.args) != 2:
                    raise # Regular ValueError exception
                print "%s from %s about %s" % (e.args[0],
                        e.args[1].get("From"), e.args[1].get("Subject"))
                continue
Esempio n. 6
0
    def from_mbox(self, mbfile):
        """
        Insert all the emails contained in an mbox file into the database.

        :arg mbfile: a mailbox file
        """
        mbox = mailbox.mbox(mbfile)
        progress_marker = ProgressMarker(self.verbose, self.stdout)
        if not self.since:
            progress_marker.total = len(mbox)
        for message in mbox:
            if self._is_too_old(message):
                continue
            progress_marker.tick(message["Message-Id"])
            # Un-wrap the subject line if necessary
            if message["subject"]:
                message.replace_header(
                    "subject", TEXTWRAP_RE.sub(" ", message["subject"]))
            if message.get_from():
                message.set_unixfrom(message.get_from())
            # Now insert the message
            try:
                with transaction.atomic():
                    add_to_list(self.list_address, message)
            except DuplicateMessage as e:
                if self.verbose:
                    self.stderr.write("Duplicate email with message-id '%s'" %
                                      e.args[0])
                continue
            except ValueError as e:
                self.stderr.write("Failed adding message %s: %s" %
                                  (message.get("Message-ID"), e))
                if len(e.args) != 2:
                    raise  # Regular ValueError exception
                try:
                    self.stderr.write("%s from %s about %s" %
                                      (e.args[0], e.args[1].get("From"),
                                       e.args[1].get("Subject")))
                except UnicodeDecodeError:
                    pass
                continue
            except DatabaseError:
                try:
                    print_exc(file=self.stderr)
                except UnicodeError:
                    pass
                self.stderr.write("Message %s failed to import, skipping" %
                                  unquote(message["Message-Id"]))
                continue
            email = Email.objects.get(mailinglist__name=self.list_address,
                                      message_id=get_message_id(message))
            # # Commit every time to be able to rollback on error
            # if not transaction.get_autocommit():
            #     transaction.commit()
            # Store the list of impacted threads to be able to compute the
            # thread_order and thread_depth values
            self.impacted_thread_ids.add(email.thread_id)
            progress_marker.count_imported += 1
        # self.store.search_index.flush() # Now commit to the search index
        progress_marker.finish()
Esempio n. 7
0
def parse_header_value(header: str) -> Tuple[str, Dict[str, str]]:
    """
    Parse an HTTP header value.

    Parameter values will be unquoted.
    If the key ends with an asterisk (``*``), the asterisk is removed from the key name and the
    value is then decoded according to :rfc:`2231`.

    :param header:
    :return: a tuple of (main value, params dict)

    """
    assert check_argument_types()
    main_value, params_str = header.partition(';')[::2]
    params = {}
    for match in header_param_re.finditer(params_str):
        key, value = match.groups()
        value = unquote(value)
        if key.endswith('*'):
            key = key[:-1]
            encoding, value = decode_rfc2231(value)[::2]
            value = urllib_unquote(value, encoding)

        params[key] = value

    return main_value.rstrip(), params
Esempio n. 8
0
 def sender(self):
     s = self._get("from")
     name, addr = parseaddr(s)
     s = unquote(name)
     if s.startswith("=?"):
         name = self._decode(s)
     return name, addr
Esempio n. 9
0
    def __init__(self, attachment, encoding):
        # Note that an attachment can be either a tuple of (filename, content, mimetype)
        # or a MIMEBase object. (Also, both filename and mimetype may be missing.)
        self._attachment = attachment
        self.encoding = encoding  # should we be checking attachment["Content-Encoding"] ???
        self.inline = False
        self.content_id = None
        self.cid = ""

        if isinstance(attachment, MIMEBase):
            self.name = attachment.get_filename()
            self.content = attachment.get_payload(decode=True)
            self.mimetype = attachment.get_content_type()

            if get_content_disposition(attachment) == 'inline':
                self.inline = True
                self.content_id = attachment["Content-ID"]  # probably including the <...>
                if self.content_id is not None:
                    self.cid = unquote(self.content_id)  # without the <, >
        else:
            (self.name, self.content, self.mimetype) = attachment

        # Guess missing mimetype from filename, borrowed from
        # django.core.mail.EmailMessage._create_attachment()
        if self.mimetype is None and self.name is not None:
            self.mimetype, _ = mimetypes.guess_type(self.name)
        if self.mimetype is None:
            self.mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
Esempio n. 10
0
    def get(self):
        data = self.parser.parse_args()
        ref = unquote(data['ref']).strip()
        get_all = data.get('getall', False)
        get_both = data.get('getboth', False)

        return integrated_lookup(ref, return_all=get_all, return_both=get_both)
Esempio n. 11
0
    def __init__(self, attachment, encoding):
        # Note that an attachment can be either a tuple of (filename, content, mimetype)
        # or a MIMEBase object. (Also, both filename and mimetype may be missing.)
        self._attachment = attachment
        self.encoding = encoding  # should we be checking attachment["Content-Encoding"] ???
        self.inline = False
        self.content_id = None
        self.cid = ""

        if isinstance(attachment, MIMEBase):
            self.name = attachment.get_filename()
            self.content = attachment.get_payload(decode=True)
            self.mimetype = attachment.get_content_type()

            if get_content_disposition(attachment) == 'inline':
                self.inline = True
                self.content_id = attachment[
                    "Content-ID"]  # probably including the <...>
                if self.content_id is not None:
                    self.cid = unquote(self.content_id)  # without the <, >
        else:
            (self.name, self.content, self.mimetype) = attachment

        # Guess missing mimetype from filename, borrowed from
        # django.core.mail.EmailMessage._create_attachment()
        if self.mimetype is None and self.name is not None:
            self.mimetype, _ = mimetypes.guess_type(self.name)
        if self.mimetype is None:
            self.mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
Esempio n. 12
0
    def from_mbox(self, mbfile):
        """
        Insert all the emails contained in an mbox file into the database.

        :arg mbfile: a mailbox file
        """
        #self.store.search_index = make_delayed(self.store.search_index)
        mbox = mailbox.mbox(mbfile)
        progress_marker = ProgressMarker(self.verbose, self.stdout)
        if not self.since:
            progress_marker.total = len(mbox)
        for message in mbox:
            if self._is_too_old(message):
                continue
            progress_marker.tick(message["Message-Id"])
            # Un-wrap the subject line if necessary
            if message["subject"]:
                message.replace_header("subject",
                        TEXTWRAP_RE.sub(" ", message["subject"]))
            # Now insert the message
            try:
                with transaction.atomic():
                    add_to_list(self.list_address, message)
            except DuplicateMessage as e:
                if self.verbose:
                    self.stderr.write(
                        "Duplicate email with message-id '%s'" % e.args[0])
                continue
            except ValueError as e:
                if len(e.args) != 2:
                    raise # Regular ValueError exception
                try:
                    self.stderr.write("%s from %s about %s"
                        % (e.args[0], e.args[1].get("From"),
                           e.args[1].get("Subject")))
                except UnicodeDecodeError:
                    self.stderr.write("%s with message-id %s"
                        % (e.args[0], e.args[1].get("Message-ID")))
                continue
            except DatabaseError:
                try:
                    print_exc(file=self.stderr)
                except UnicodeError:
                    pass
                self.stderr.write("Message %s failed to import, skipping"
                      % unquote(message["Message-Id"]))
                continue
            email = Email.objects.get(
                mailinglist__name=self.list_address,
                message_id=get_message_id(message))
            ## Commit every time to be able to rollback on error
            #if not transaction.get_autocommit():
            #    transaction.commit()
            # Store the list of impacted threads to be able to compute the
            # thread_order and thread_depth values
            self.impacted_thread_ids.add(email.thread_id)
            progress_marker.count_imported += 1
        #self.store.search_index.flush() # Now commit to the search index
        progress_marker.finish()
Esempio n. 13
0
def attach_inline_image(message, content, filename=None, subtype=None, idstring="img", domain=None):
    """Add inline image to an EmailMessage, and return its content id"""
    content_id = make_msgid(idstring, domain)  # Content ID per RFC 2045 section 7 (with <...>)
    image = MIMEImage(content, subtype)
    image.add_header('Content-Disposition', 'inline', filename=filename)
    image.add_header('Content-ID', content_id)
    message.attach(image)
    return unquote(content_id)  # Without <...>, for use as the <img> tag src
def doc_from_bytes(docid, rdkey, b):
    msg = message_from_string(b)
    doc = {}
    mp = doc['multipart'] = msg.is_multipart()
    headers = doc['headers'] = {}
    # Given we have no opportunity to introduce an object which can ignore
    # the case of headers, we lowercase the keys
    for hn in msg.keys():
        vals = msg.get_all(hn)
        if vals:
            # first do any charset etc conversion...
            vals = [_safe_convert_header(v) for v in vals]
            if hn.lower() == 'references':
                # email.utils.unquote will do bad things to references headers (stripping
                # initial and trailing <>'s, so we don't want to use it for the
                # references header-- but other fields seem ok.  We split the references
                # into a list here because why not.
                headers[hn.lower()] = [extract_message_ids(vals[0])]
            else:
                headers[hn.lower()] = [unquote(v) for v in vals]
            # a sanity check and to help debug an obscure bug which seemed to
            # cause the wrong 'source' doc being passed!
            if __debug__ and rdkey[0]=='email' and hn.lower()=='message-id':
                from raindrop.proto.imap import get_rdkey_for_email
                assert tuple(rdkey)==get_rdkey_for_email(vals[0]), (rdkey, docid, vals)

    # XXX - technically msg objects are recursive; handling that requires
    # more thought.  For now, assume they are flat.
    # We must return non-text parts in attachments, so just return
    # *everything* in attachments.
    attachments = doc['_attachments'] = {}

    if mp:
        # a multi-part message - flatten it here by walking the list, but
        # only looking at the 'leaf' nodes.
        # attachments have lost their order; this object helps keep the
        # other and is a convenient place to stash other headers coming
        # with this part.
        mi = doc['multipart_info'] = []
        i = 1
        for attach in msg.walk():
            if not attach.is_multipart():
                name = sanitize_attach_name(attach.get_filename())
                if not name:
                    name = "subpart-%d" % i
                    i += 1
                attachments[name] = attach_from_msg((docid, name), attach)
                # Put together info about the attachment.
                ah = {}
                for hn, hv in attach.items():
                    ah[hn.lower()] = _safe_convert_header(hv)
                # content-type is redundant, but may be helpful...
                ct = attachments[name]['content_type']
                info = {'name': name, 'headers': ah, 'content_type': ct}
                mi.append(info)
    else:
        attachments['body'] = attach_from_msg((docid, 'body'), msg)
    return doc
Esempio n. 15
0
    def test_email_from_html(self):
        from nimodipine.management.commands.send_messages import inline_images
        from django.core.mail import EmailMultiAlternatives

        msg = EmailMultiAlternatives(subject="foo")
        html = 'some <b>html</b> and stuff <img src="data:image/png;base64,cafe"> ting'
        msg = inline_images(msg, html)
        attachment = msg.attachments[0]
        self.assertEqual(attachment.get_payload(), "cafe")
        cid = unquote(attachment.get("content-id"))
        self.assertIn('<img src="cid:{}">'.format(cid), msg.alternatives[0][0])
Esempio n. 16
0
    def _get_date(self, message, header, report_name):
        try:
            date = message.get(header)
        except (TypeError, ValueError) as e:
            if self.verbose:
                self.stderr.write(
                    "Can't get {} header in message {}{}: {}.".format(
                        header, unquote(message.get("message-id", 'n/a')),
                        report_name, e))
            return None

        return date
Esempio n. 17
0
def attach_inline_image(message, content, filename=None, subtype=None, idstring="img", domain=None):
    """Add inline image to an EmailMessage, and return its content id"""
    if domain is None:
        # Avoid defaulting to hostname that might end in '.com', because some ESPs
        # use Content-ID as filename, and Gmail blocks filenames ending in '.com'.
        domain = 'inline'  # valid domain for a msgid; will never be a real TLD
    content_id = make_msgid(idstring, domain)  # Content ID per RFC 2045 section 7 (with <...>)
    image = MIMEImage(content, subtype)
    image.add_header('Content-Disposition', 'inline', filename=filename)
    image.add_header('Content-ID', content_id)
    message.attach(image)
    return unquote(content_id)  # Without <...>, for use as the <img> tag src
Esempio n. 18
0
    def _get_cookies(self):
        """
		从environ里取出cookies字符串,并解析成键值对 组成的字典
		"""
        if not hasattr(self, '_cookies'):
            cookies = {}
            cookie_str = self._environ.get('HTTP_COOKIE')
            if cookie_str:
                for c in cookie_str.split(';'):
                    pos = c.find('=')
                    if pos > 0:
                        cookies[c[:pos].strip()] = eutils.unquote(c[pos + 1:])
            self._cookies = cookies
        return self._cookies
Esempio n. 19
0
def get_message_message_id(message_id_str: str) -> str:
    """
    Get the message message-id header as a string.

    NOTE: No need to use unquote, as policy strict bakes this in.

    :param message_id_str: the message 'message id' header as a string
    :return: parsed or generated message id
    """
    # Create message-id if non found
    if not message_id_str:
        message_id_str = make_msgid()

    clean_message_id = unquote(message_id_str)
    return clean_message_id
Esempio n. 20
0
def inline_images(message, html):
    """Given HTML with inline data images, convert these to attachments,
    and add HTML as an alternative
    """
    images = re.findall(r'<img.*?src="data:image/png;base64,.*?">', html)
    for i, image_tag in enumerate(images):
        filename = "img{}.png".format(i)
        data = re.findall(r'<img.*?src="data:image/png;base64,(.*?)">',
                          image_tag)[0]
        content_id = make_msgid(
            "img")  # Content ID per RFC 2045 section 7 (with <...>)
        image = MIMEImage(data, "png", _encoder=lambda x: x)
        image.add_header("Content-Disposition", "inline", filename=filename)
        image.add_header("Content-ID", content_id)
        image.add_header("Content-Transfer-Encoding", "base64")
        message.attach(image)
        html = html.replace(image_tag,
                            '<img src="cid:{}">'.format(unquote(content_id)))
    message.attach_alternative(html, "text/html")
    return message
Esempio n. 21
0
 def _is_too_old(self, message, report_name):
     if not self.since:
         return False
     date = message.get("date")
     if not date:
         return False
     try:
         date = parse_date(date)
     except ValueError as e:
         if self.verbose:
             self.stderr.write(
                 "Can't parse date string in message {}{}: {}. "
                 "The date string is: '{}'".format(
                     unquote(message.get("message-id", 'n/a')),
                     report_name, e, date.decode("ascii", "replace")))
         return False
     if date.tzinfo is None:
         date = date.replace(tzinfo=utc)
     try:
         return date <= self.since
     except ValueError:
         return False
Esempio n. 22
0
    def post(self, request):

        user_form = self.user_update_form_class(data=request.POST, instance=request.user)
        profile_form = self.profile_form_class(data=request.POST, files=request.FILES, instance=request.user.profile)

        if user_form.is_valid() and profile_form.is_valid():
            update = user_form.save(commit=False)
            update.user = request.user
            update.user.username = request.user.username
            user = User.objects.filter(email=unquote(request.user.email))

            if user:
                if user[0].id == request.user.id: #if user didn't change email
                    update.save()
                    profile_form.save()
                else:
                    return render(request, self.template_name, {'user_form': user_form, 'profile_form': profile_form,
                                                                'error_message':'This email address is already in use. Please supply a different email address.'})
            else:
                update.save()
                profile_form.save()

        return render(request, self.template_name, {'user_form': user_form, 'profile_form': profile_form})
Esempio n. 23
0
def _unquotevalue(value):
    if isinstance(value, tuple):
        return (value[0], value[1], utils.unquote(value[2]))
    else:
        return utils.unquote(value)
Esempio n. 24
0
def test_generate_header(key, val):
    header = generate_header(key, {key: val})
    k, params = parse_header(header)
    assert unquote(k), params == (key, {key: val})
Esempio n. 25
0
 def inline_attachments(self):
     """dict of Content-ID: attachment (as MIMEPart objects)"""
     return {unquote(part['Content-ID']): part for part in self.walk()
             if part.is_inline_attachment() and part['Content-ID']}
Esempio n. 26
0
    def add_to_list(self, mlist, message):
        """Add the message to a specific list of the store.

        :param mlist: The mailing-list object, implementing
            mailman.interfaces.mailinglist.IMailingList.
        :param message: An email.message.Message instance containing at
            least a unique Message-ID header.  The message will be given
            an X-Message-ID-Hash header, overriding any existing such
            header.
        :returns: The calculated X-Message-ID-Hash header.
        :raises ValueError: if the message is missing a Message-ID 
            header.
            The storage service is also allowed to raise this exception
            if it find, but disallows collisions.
        """
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            self.db.add(l)
        l.display_name = mlist.display_name
        l.subject_prefix = mlist.subject_prefix
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message['Message-Id']))
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            print("Duplicate email from %s: %s" %
                  (message['From'], message.get('Subject', '""')))
            return email.message_id_hash

        # the message.as_string() call must be done before scrubbing
        email_full = EmailFull(list_name, msg_id, message.as_string())
        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        from_name, from_email = parseaddr(message['From'])
        from_name = header_to_unicode(from_name)
        email.sender_name = from_name.strip()
        email.sender_email = unicode(from_email).strip()
        email.subject = header_to_unicode(message.get('Subject'))
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.utcnow()
        utcoffset = msg_date.utcoffset()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = (
                (utcoffset.days * 24 * 60 * 60) + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        # store the Mailman user
        email.user_id = self._store_mailman_user(email.sender_email)

        #category = 'Question' # TODO: enum + i18n ?
        #if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(
                Thread,
                And(
                    Thread.list_name == list_name,
                    Thread.thread_id == thread_id,
                )).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        self.db.add(email_full)
        compute_thread_order_and_depth(thread)
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        self.flush()
        # search indexing
        if self.search_index is not None:
            self.search_index.add(email)
        return email.message_id_hash
Esempio n. 27
0
  def process_headers( self, msg ):
    headers = {}
    # for now we just take todays date as the received date
    message = { "receivedDate" : datetime.datetime.utcnow().isoformat() }

    for hn in msg.keys():
      header_values = msg.get_all(hn)
      if header_values:
        header_name = hn.lower()
        # add this header to the list of available headers
        headers[header_name] = []

        # do any charset etc conversion on the values...
        header_values = [self._safe_convert_header(v) for v in header_values]

        # go through the values converting them into usable lists
        for value in header_values:
          if re.match(r"<.+>,",value):
            for v in value.split(","):
              headers[header_name].append(unquote(v.strip()))
          # multiple reference processing
          elif header_name == "references" and re.match(r"<[^<>]+>\s+",value):
            for ref in re.findall(r"<[^<>]+>",value):
              headers[header_name].append(unquote(ref.strip()))
          else:
            headers[header_name].append(unquote(value.strip()))

    for header_name in headers:
      header_values = headers[header_name]
      if header_name in ["to","cc", "bcc", "from", "replyto"]:
        message[header_name] = [{ "name" : name, "address" : address} \
                                  for name, address \
                                  in getaddresses(header_values) \
                                  if address]
      elif header_name == "received":
        dv = 0
        for v in header_values:
          date = re.match(r".*;\s*(.+)",v,re.DOTALL).group(1)
          parse = int(mktime_tz(parsedate_tz(date)))
          if parse > dv:
            dv = parse
            rd = formatdate(parse)
            message["receivedDate"] = { "original" : rd, 
                                        "utctimestamp" : parse,
                                        "utcisoformat" : datetime.datetime.fromtimestamp(parse, tzutc()).isoformat() }

      elif header_name in ["message-id"]:
        # single value header
        value = header_values[0]
        message["mid"] = value

      elif header_name in ["subject"]:
        # single value header
        value = header_values[0]
        message["subject"] = value

      elif header_name in ["date"]:
        # single value header
        value = header_values[0]
        utctimestamp = int(mktime_tz(parsedate_tz(value)))
        timestamp = datetime.datetime.fromtimestamp(utctimestamp, tzutc())
        message["date"] = { "original" : value, 
                            "utctimestamp" : utctimestamp, 
                            "utcisoformat" : timestamp.isoformat() }

    return message
Esempio n. 28
0
            try:
                self.store.add_to_list(self.mlist, message)
            except ValueError, e:
                if len(e.args) != 2:
                    raise # Regular ValueError exception
                try:
                    print "%s from %s about %s" % (e.args[0],
                            e.args[1].get("From"), e.args[1].get("Subject"))
                except UnicodeDecodeError:
                    print "%s with message-id %s" % (
                            e.args[0], e.args[1].get("Message-ID"))
                continue
            except DatabaseError:
                print_exc()
                print ("Message %s failed to import, skipping"
                       % unquote(message["Message-Id"]))
                self.store.rollback()
                continue
            # And insert the attachments
            for counter, att in enumerate(attachments):
                self.store.add_attachment(
                        self.mlist.fqdn_listname,
                        message["Message-Id"].strip(" <>"),
                        counter, att[0], att[1], None, att[2])

            self.store.flush()
            cnt_imported += 1
            # Commit every time to be able to rollback on error
            self.store.commit()
        self.store.search_index.flush() # Now commit to the search index
        if self.verbose:
Esempio n. 29
0
    def add_to_list(self, mlist, message):
        """Add the message to a specific list of the store.

        :param mlist: The mailing-list object, implementing
            mailman.interfaces.mailinglist.IMailingList.
        :param message: An email.message.Message instance containing at
            least a unique Message-ID header.  The message will be given
            an X-Message-ID-Hash header, overriding any existing such
            header.
        :returns: The calculated X-Message-ID-Hash header.
        :raises ValueError: if the message is missing a Message-ID 
            header.
            The storage service is also allowed to raise this exception
            if it find, but disallows collisions.
        """
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            self.db.add(l)
        l.display_name = mlist.display_name
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message['Message-Id']))
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            print ("Duplicate email from %s: %s" %
                   (message['From'], message.get('Subject', '""')))
            return email.message_id_hash

        # the message.as_string() call must be done before scrubbing
        email_full = EmailFull(list_name, msg_id, message.as_string())
        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        from_name, from_email = parseaddr(message['From'])
        from_name = header_to_unicode(from_name)
        email.sender_name = from_name.strip()
        email.sender_email = unicode(from_email).strip()
        email.subject = header_to_unicode(message.get('Subject'))
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.now()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        utcoffset = msg_date.utcoffset()
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = ( (utcoffset.days * 24 * 60 * 60)
                               + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        #category = 'Question' # TODO: enum + i18n ?
        #if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(Thread, And(
                            Thread.list_name == list_name,
                            Thread.thread_id == thread_id,
                            )).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        self.db.add(email_full)
        self.flush()
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        return email.message_id_hash
 def test_cid_in_message(self):
     alternative_message = self.message.alternatives[0][0]
     self.assertIn('cid:%s' % unquote(self.inline_image._content_id),
                   alternative_message)
Esempio n. 31
0
def _unquotevalue(value):
    if isinstance(value, tuple):
        return (value[0], value[1], utils.unquote(value[2]))
    else:
        return utils.unquote(value)
Esempio n. 32
0
 def _unquote_boundary(self, b):
     return b[:2] + email_utils.unquote(
         b[2:-2].decode('ascii')).encode('ascii') + b[-2:]
Esempio n. 33
0
groups = {
    'transporte': 'Transporte',
    'turismo': 'Turismo',
    'vivienda': 'Vivienda',
    'cultura': 'Cultura',
    'deporte': 'Deporte',
    'desarrollo-social': 'Desarrollo Social',
    'economia': 'Economía',
    'educacion': 'Educación',
    'industria': 'Industria',
    'infraestructura': 'Infraestructura',
    'medio-ambiente': 'Medio Ambiente',
    'salud': 'Salud',
    'seguridad': 'Seguridad',
    'trabajo': 'Trabajo'
}

for name, title in groups.iteritems():
    ckan.group_register_post({'name': name, 'title': title})

# asociate datasets to groups based on category custom field
for package_id in ckan.package_register_get():
    group_name = unquote(
        ckan.package_entity_get(package_id)['extras'].get('category'))
    if group_name:
        group_name_to_get = group_name.replace('_', '-')
        group = ckan.group_entity_get(group_name_to_get)
        if package_id not in group['packages']:
            group['packages'].append(package_id)
            ckan.group_entity_put(group)
Esempio n. 34
0
 def __str__(self):
     if not self._content_id:
         self.generate_cid()
     return 'cid:' + unquote(self._content_id)
Esempio n. 35
0
    def add_to_list(self, mlist, message):
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            # Don't wait for the cache to set those properties
            for propname in l.mailman_props:
                setattr(l, propname, getattr(mlist, propname))
            self.db.add(l)
        if mlist.archive_policy == ArchivePolicy.never:
            logger.info("Archiving disabled by list policy for %s" % list_name)
            return None
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message["Message-Id"]))
        # Protect against extremely long Message-Ids (there is no limit in the
        # email spec), it's set to VARCHAR(255) in the database
        if len(msg_id) >= 255:
            msg_id = msg_id[:254]
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            logger.info("Duplicate email from %s: %s" % (message["From"], message.get("Subject", '""')))
            return email.message_id_hash

        # if not getattr(settings.KITTYSTORE_FULL_EMAIL):
        #    # If it's a valid value, leave it to the "prototype" archiver
        #    # Note: the message.as_string() call must be done before scrubbing
        #    email_full = EmailFull(list_name, msg_id, message.as_string())
        #    self.db.add(email_full)

        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        try:
            from_name, from_email = parseaddr(message["From"])
            from_name = header_to_unicode(from_name).strip()
            email.sender_email = unicode(from_email).strip()
        except (UnicodeDecodeError, UnicodeEncodeError):
            raise ValueError("Non-ascii sender address", message)
        sender = self.db.find(Sender, Sender.email == email.sender_email).one()
        if sender is None:
            sender = Sender(email.sender_email, from_name)
            self.db.add(sender)
        else:
            sender.name = from_name  # update the name if needed
        email.subject = header_to_unicode(message.get("Subject"))
        if email.subject is not None:
            # limit subject size to 2000 chars or PostgreSQL may complain
            email.subject = email.subject[:2000]
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.utcnow()
        utcoffset = msg_date.utcoffset()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = ((utcoffset.days * 24 * 60 * 60) + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        # category = 'Question' # TODO: enum + i18n ?
        # if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(Thread, And(Thread.list_name == list_name, Thread.thread_id == thread_id)).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        compute_thread_order_and_depth(thread)
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        self.flush()
        # invalidate the cache
        events.notify(events.NewMessage(self, mlist, email))
        if new_thread:
            events.notify(events.NewThread(self, mlist, thread))
        # search indexing
        # do it after caching because we need some list properties (like
        # archive_policy)
        if self.search_index is not None:
            self.search_index.add(email)

        return email.message_id_hash
Esempio n. 36
0
    def from_mbox(self, mbfile):
        """
        Insert all the emails contained in an mbox file into the database.

        :arg mbfile: a mailbox file
        """
        mbox = mailbox.mbox(mbfile)
        progress_marker = ProgressMarker(self.verbose, self.stdout)
        if not self.since:
            progress_marker.total = len(mbox)
        for msg in mbox:
            # FIXME: this converts mailbox.mboxMessage to
            # email.message.EmailMessage
            msg_raw = msg.as_bytes(unixfrom=False)
            unixfrom = msg.get_from()
            message = message_from_bytes(msg_raw, policy=policy.default)
            # Fix missing and wierd Date: headers.
            date = (self._get_date(message, "date")
                    or self._get_date(message, "resent-date"))
            if unixfrom and not date:
                date = " ".join(unixfrom.split()[1:])

            if date:
                # Make sure this date can be parsed before setting it as as the
                # header. If not, a TypeError is raised and we just keep the
                # old Header.
                with suppress(TypeError):
                    del message['Date']
                    message['Date'] = date

            if self._is_too_old(message):
                continue
            progress_marker.tick(message["Message-Id"])
            # Un-wrap the subject line if necessary
            if message["subject"]:
                message.replace_header(
                    "subject", TEXTWRAP_RE.sub(" ", message["subject"]))
            if unixfrom:
                message.set_unixfrom(unixfrom)
            if message['message-id'] is None:
                message['Message-ID'] = make_msgid('generated')
            # Now insert the message
            try:
                with transaction.atomic():
                    add_to_list(self.list_address, message)
            except DuplicateMessage as e:
                if self.verbose:
                    self.stderr.write("Duplicate email with message-id '%s'" %
                                      e.args[0])
                continue
            except (LookupError, UnicodeError, ValueError) as e:
                self.stderr.write("Failed adding message %s: %s" %
                                  (message.get("Message-ID"), e))
                if len(e.args) == 2:
                    try:
                        self.stderr.write("%s from %s about %s" %
                                          (e.args[0], e.args[1].get("From"),
                                           e.args[1].get("Subject")))
                    except UnicodeDecodeError:
                        pass
                # Don't reraise the exception
                continue
            except DatabaseError:
                try:
                    print_exc(file=self.stderr)
                except UnicodeError:
                    pass
                self.stderr.write("Message %s failed to import, skipping" %
                                  unquote(message["Message-Id"]))
                continue
            except Exception as e:
                # In case of *any* exception, log and continue to import the
                # rest of the archive.
                self.stderr.write(
                    "Message {} failed to import, skipping".format(
                        unquote(message["Message-ID"])))
                self.stderr.write(e)
                continue
            email = Email.objects.get(mailinglist__name=self.list_address,
                                      message_id=get_message_id(message))
            # # Commit every time to be able to rollback on error
            # if not transaction.get_autocommit():
            #     transaction.commit()
            # Store the list of impacted threads to be able to compute the
            # thread_order and thread_depth values
            self.impacted_thread_ids.add(email.thread_id)
            progress_marker.count_imported += 1
        # self.store.search_index.flush() # Now commit to the search index
        progress_marker.finish()
        mbox.close()
Esempio n. 37
0
    def try_download_link(self,
                          add_token: bool = False,
                          delete_if_successful: bool = False,
                          use_cookies: bool = False) -> bool:
        """This function should only be used for shortcut/URL files.
        It tests whether a URL refers to a file, that is not an HTML web page.
        Then downloads it. Otherwise an attempt will be made to download an HTML video
        from the website.

        Args:
            add_token (bool, optional): Adds the ws-token to the url. Defaults to False.
            delete_if_successful (bool, optional): Deletes the tmp file if download was successfull. Defaults to False.
            use_cookies (bool, optional): Adds the cookies to the requests. Defaults to False.

        Returns:
            bool: If it was successfull.
        """

        url_to_download = self.file.content_fileurl
        logging.debug('T%s - Try to download linked file %s', self.thread_id,
                      url_to_download)

        if add_token:
            url_to_download = self._add_token_to_url(self.file.content_fileurl)

        cookies_path = self.options.get('cookies_path', None)
        if use_cookies:
            if cookies_path is None or not os.path.isfile(cookies_path):
                self.success = False
                raise ValueError(
                    'Moodle Cookies are missing. Run `moodle-dl -nt` to set a privatetoken for cookie generation (If necessary additionally `-sso`)'
                )

        if delete_if_successful:
            # if temporary file is not needed delete it as soon as possible
            try:
                os.remove(self.file.saved_to)
            except Exception as e:
                logging.warning(
                    'T%s - Could not delete %s before download is started. Error: %s',
                    self.thread_id,
                    self.file.saved_to,
                    e,
                )

        isHTML = False
        new_filename = ""
        total_bytes_estimate = -1
        session = requests.Session()

        if cookies_path is not None:
            session.cookies = MozillaCookieJar(cookies_path)
            if os.path.isfile(cookies_path):
                session.cookies.load(ignore_discard=True, ignore_expires=True)

        try:
            response = session.head(
                url_to_download,
                headers=RequestHelper.stdHeader,
                verify=self.verify_cert,
                allow_redirects=True,
            )
        except (InvalidSchema, InvalidURL, MissingSchema):
            # don't download urls like 'mailto:[email protected]'
            logging.debug(
                'T%s - Attempt is aborted because the URL has no correct format',
                self.thread_id)
            self.success = True
            return False

        if not response.ok:
            # The URL reports an HTTP error, so we give up trying to download the URL.
            logging.warning(
                'T%s - Stopping the attemp to download %s because of the HTTP ERROR %s',
                self.thread_id,
                self.file.content_fileurl,
                response.status_code,
            )
            self.success = True
            return True

        content_type = response.headers.get('Content-Type',
                                            'text/html').split(';')[0]
        if content_type == 'text/html' or content_type == 'text/plain':
            isHTML = True

        total_bytes_estimate = int(response.headers.get('Content-Length', -1))
        last_modified = response.headers.get('Last-Modified', None)

        if response.url != url_to_download:
            if response.history and len(response.history) > 0:
                logging.debug('T%s - URL was %s time(s) redirected',
                              self.thread_id, len(response.history))
            else:
                logging.debug(
                    'T%s - URL has changed after information retrieval',
                    self.thread_id)
            url_to_download = response.url

        url_parsed = urlparse.urlparse(url_to_download)
        new_filename = posixpath.basename(url_parsed.path)

        if "Content-Disposition" in response.headers.keys():
            found_names = re.findall("filename=(.+)",
                                     response.headers["Content-Disposition"])
            if len(found_names) > 0:
                new_filename = unquote(found_names[0])

        if isHTML and not self.is_blocked_for_youtube_dl(url_to_download):

            filename_tmpl = self.filename + ' | %(title)s (%(id)s).%(ext)s'
            if self.file.content_type == 'description-url':
                filename_tmpl = '%(title)s (%(id)s).%(ext)s'
            outtmpl = str(Path(self.destination) / filename_tmpl)

            ydl_opts = {
                'logger': self.YtLogger(self),
                'progress_hooks': [self.yt_hook],
                'outtmpl': outtmpl,
                'nocheckcertificate': self.skip_cert_verify,
                'retries': 10,
                'fragment_retries': 10,
                'ignoreerrors': True,
                'addmetadata': True,
            }

            youtube_dl_options = self.options.get('youtube_dl_options', {})
            ydl_opts.update(youtube_dl_options)

            if cookies_path is not None and os.path.isfile(cookies_path):
                ydl_opts.update({'cookiefile': cookies_path})

            ydl = youtube_dl.YoutubeDL(ydl_opts)
            add_additional_extractors(ydl)
            try:
                ydl_results = ydl.download([url_to_download])
                if ydl_results == 1:
                    pass
                elif self.file.module_name != 'index_mod-page':
                    self.file.saved_to = str(
                        Path(self.destination) / self.filename)
                    self.file.time_stamp = int(time.time())

                    self.success = True
                    return True
            except Exception as e:
                logging.error(
                    'T%s - Youtube-dl failed! Error: %s',
                    self.thread_id,
                    e,
                )
                self.youtube_dl_failed_with_error = True

            # if we want we could save ydl.cookiejar (Also the cookiejar of moodle-dl)

            if self.youtube_dl_failed_with_error is True:
                if not delete_if_successful:
                    # cleanup the url-link file
                    try:
                        os.remove(self.file.saved_to)
                    except Exception as e:
                        logging.warning(
                            'T%s - Could not delete %s after youtube-dl failed. Error: %s',
                            self.thread_id,
                            self.file.saved_to,
                            e,
                        )
                self.success = False
                raise RuntimeError(
                    'Youtube-dl could not download the URL. For details see youtube-dl error messages in the log file'
                )

        logging.debug('T%s - Downloading file directly', self.thread_id)

        # generate file extension for modules names
        new_name, new_extension = os.path.splitext(new_filename)
        if new_extension == '' and isHTML:
            new_extension = '.html'

        if self.file.content_type == 'description-url' and new_name != '':
            self.filename = new_name + new_extension

        old_name, old_extension = os.path.splitext(self.filename)

        if old_extension != new_extension:
            self.filename = self.filename + new_extension

        self.set_path(True)

        if total_bytes_estimate != -1:
            self.thread_report[
                self.thread_id]['extra_totalsize'] = total_bytes_estimate

        self.urlretrieve(
            url_to_download,
            self.file.saved_to,
            context=self.ssl_context,
            reporthook=self.add_progress,
            cookies_path=cookies_path,
        )

        self.set_utime(last_modified)
        self.file.time_stamp = int(time.time())

        self.success = True
        return True
 def test_cid_in_message(self):
     alternative_message = self.message.alternatives[0][0]
     self.assertIn('cid:%s' % unquote(self.inline_image._content_id),
                   alternative_message)