def maildir_source(self): from mailbox import Maildir import codecs def get_encoding(enc, default='latin1'): """ Return *enc* if *enc* is a valid encoding, *default* otherwise. """ if not enc: return default try: codecs.lookup(enc) return enc except LookupError: return default m = Maildir(self._infile, create=False) if self._folder: m = m.get_folder(self._folder) for _, msg in m.iteritems(): for part in self.get_plaintext_parts(msg): enc = get_encoding(part.get_content_charset()) content_bytes = part.get_payload(decode=True) content = content_bytes.decode(encoding=enc, errors='ignore') yield from self.filter_text(content)
def maildir_source(self): from mailbox import Maildir import codecs def get_encoding(enc, default='latin1'): """Return *enc* if *enc* is a valid encoding, *default* otherwise. """ if not enc: return default try: codecs.lookup(enc) return enc except LookupError: return default m = Maildir(self._infile, create=False) if self._folder: m = m.get_folder(self._folder) for _, msg in m.iteritems(): for part in self.get_plaintext_parts(msg): enc = get_encoding(part.get_content_charset()) content_bytes = part.get_payload(decode=True) content = content_bytes.decode(encoding=enc, errors='ignore') yield from self.filter_text(content)
def import_email(email, import_path, format, **kwargs): from caliopen.core.user import User from caliopen.core.contact import Contact, ContactLookup from caliopen.core.mail import MailMessage from caliopen.smtp.agent import DeliveryAgent AVATAR_DIR = '../../../caliopen.ng/src/assets/images/avatars' if format == 'maildir': emails = Maildir(import_path, factory=message_from_file) mode = 'maildir' else: if os.path.isdir(import_path): mode = 'mbox_directory' emails = {} files = [ f for f in listdir(import_path) if os.path.isfile(os.path.join(import_path, f)) ] for f in files: with open('%s/%s' % (import_path, f)) as fh: emails[f] = message_from_file(fh) else: mode = 'mbox' emails = mbox(import_path) print email user = User.get(email) agent = DeliveryAgent() mailfrom = '' rcpts = [email] log.info("Processing mode %s" % mode) msgs = [] for key, mail in emails.iteritems(): # Create contact for user log.info('Processing mail %s' % key) msgs.append(MailMessage(mail)) msgs = sorted(msgs, key=lambda msg: msg.date) for msg in msgs: for type, addresses in msg.recipients.iteritems(): if not addresses: continue for alias, _address in addresses: lookup = ContactLookup.get(user, alias) if not lookup: log.info('Creating contact %s' % alias) infos = {'mail': alias} name, domain = alias.split('@') if os.path.isfile('%s/%s.png' % (AVATAR_DIR, name)): infos.update({'avatar': '%s.png' % name}) Contact.create(user, infos) res = agent.process(mailfrom, rcpts, msg.mail.as_string()) log.info('Process result %r' % res)
def import_email(email, import_path, format, **kwargs): from caliop.helpers.log import log from caliop.core.user import User from caliop.core.contact import Contact, ContactLookup from caliop.core.mail import MailMessage from caliop.smtp.agent import DeliveryAgent AVATAR_DIR = '../../caliop.ng/src/assets/images/avatars' if format == 'maildir': emails = Maildir(import_path, factory=message_from_file) mode = 'maildir' else: if os.path.isdir(import_path): mode = 'mbox_directory' emails = {} files = [f for f in listdir(import_path) if os.path.isfile(os.path.join(import_path, f))] for f in files: with open('%s/%s' % (import_path, f)) as fh: emails[f] = message_from_file(fh) else: mode = 'mbox' emails = mbox(import_path) user = User.get(email) agent = DeliveryAgent() mailfrom = '' rcpts = [email] log.info("Processing mode %s" % mode) msgs = [] for key, mail in emails.iteritems(): # Create contact for user log.info('Processing mail %s' % key) msgs.append(MailMessage(mail)) msgs = sorted(msgs, key=lambda msg: msg.date) for msg in msgs: for type, addresses in msg.recipients.iteritems(): if not addresses: continue for alias, _address in addresses: lookup = ContactLookup.get(user, alias) if not lookup: log.info('Creating contact %s' % alias) infos = {'mail': alias} name, domain = alias.split('@') if os.path.isfile('%s/%s.png' % (AVATAR_DIR, name)): infos.update({'avatar': '%s.png' % name}) Contact.create(user, infos) res = agent.process(mailfrom, rcpts, msg.mail.as_string()) log.info('Process result %r' % res)
def add_maildir(self, maildir_path): """ Load up a maildir add compute hash for each mail their contain. """ maildir = Maildir(maildir_path, create=False) # Collate folders by hash. print("Processing {} mails in {}".format(len(maildir), maildir._path)) for mail_id, message in maildir.iteritems(): mail_file = os.path.join(maildir._path, maildir._lookup(mail_id)) try: mail_hash, header_text = self.compute_hash( mail_file, message, self.use_message_id) except InsufficientHeadersError as e: print("WARNING: ignoring problematic {}: {}".format( mail_file, e.args[0])) else: if self.mail_count > 0 and self.mail_count % 100 == 0: print(".") # print("Hash is {} for mail {!r}.".format(mail_hash, mail_id)) if mail_hash not in self.mails: self.mails[mail_hash] = [] self.mails[mail_hash].append((mail_file, message)) self.mail_count += 1
def add_maildir(self, maildir_path): """ Load up a maildir and compute hash for each mail found. """ maildir_path = self.canonical_path(maildir_path) logger.info("Opening maildir at {} ...".format(maildir_path)) # Maildir parser requires a string, not a unicode, as path. maildir = Maildir(str(maildir_path), factory=None, create=False) # Group folders by hash. logger.info("{} mails found.".format(len(maildir))) if self.progress: bar = ProgressBar(widgets=[Percentage(), Bar()], max_value=len(maildir), redirect_stderr=True, redirect_stdout=True) else: def bar(x): return x for mail_id, message in bar(maildir.iteritems()): self.stats['mail_found'] += 1 mail_path = self.canonical_path( os.path.join(maildir._path, maildir._lookup(mail_id))) mail = Mail(mail_path, self.time_source, self.use_message_id) try: mail_hash = mail.hash_key except InsufficientHeadersError as expt: logger.warning("Rejecting {}: {}".format( mail_path, expt.args[0])) self.stats['mail_rejected'] += 1 else: logger.debug("Hash is {} for mail {!r}.".format( mail_hash, mail_id)) # Use a set to deduplicate entries pointing to the same file. self.mails.setdefault(mail_hash, set()).add(mail_path) self.stats['mail_kept'] += 1
def import_email(email, import_path, format, contact_probability, **kwargs): """Import emails for an user.""" from caliopen_main.user.core import User from caliopen_main.contact.core import Contact, ContactLookup from caliopen_main.message.parsers.mail import MailMessage from caliopen_main.contact.parameters import NewContact, NewEmail from caliopen_nats.delivery import UserMessageDelivery from caliopen_main.message.core import RawMessage from caliopen_storage.config import Configuration max_size = int(Configuration("global").get("object_store.db_size_limit")) if format == 'maildir': emails = Maildir(import_path, factory=message_from_file) mode = 'maildir' else: if os.path.isdir(import_path): mode = 'mbox_directory' emails = {} files = [ f for f in os.listdir(import_path) if os.path.isfile(os.path.join(import_path, f)) ] for f in files: try: log.debug('Importing mail from file {}'.format(f)) with open('%s/%s' % (import_path, f)) as fh: emails[f] = message_from_file(fh) except Exception as exc: log.error('Error importing email {}'.format(exc)) else: mode = 'mbox' emails = mbox(import_path) user = User.by_local_identity(email) log.info("Processing mode %s" % mode) for key, data in emails.iteritems(): # Prevent creating message too large to fit in db. # (should use inject cmd for large messages) size = len(data.as_string()) if size > max_size: log.warn("Message too large to fit into db. \ Please, use 'inject' cmd for importing large emails.") continue raw = RawMessage.create(data.as_string()) log.debug('Created raw message {}'.format(raw.raw_msg_id)) message = MailMessage(data.as_string()) dice = random() if dice <= contact_probability: for participant in message.participants: try: ContactLookup.get(user, participant.address) except NotFound: log.info('Creating contact %s' % participant.address) name, domain = participant.address.split('@') contact_param = NewContact() contact_param.family_name = name if participant.address: e_mail = NewEmail() e_mail.address = participant.address contact_param.emails = [e_mail] Contact.create(user, contact_param) log.info('No contact associated to raw {} '.format(raw.raw_msg_id)) processor = UserMessageDelivery(user) obj_message = processor.process_raw(raw.raw_msg_id) log.info('Created message {}'.format(obj_message.message_id))
def import_email(email, import_path, format, **kwargs): from caliopen.base.user.core import User from caliopen.base.user.core import Contact, ContactLookup from caliopen.base.message.format.mail import MailMessage from caliopen.base.user.parameters import NewContact, NewEmail from caliopen.smtp.agent import DeliveryAgent AVATAR_DIR = "../../../caliopen.ng/src/assets/images/avatars" if format == "maildir": emails = Maildir(import_path, factory=message_from_file) mode = "maildir" else: if os.path.isdir(import_path): mode = "mbox_directory" emails = {} files = [f for f in listdir(import_path) if os.path.isfile(os.path.join(import_path, f))] for f in files: with open("%s/%s" % (import_path, f)) as fh: emails[f] = message_from_file(fh) else: mode = "mbox" emails = mbox(import_path) user = User.by_name(email) agent = DeliveryAgent() mailfrom = "" rcpts = [email] log.info("Processing mode %s" % mode) msgs = [] for key, mail in emails.iteritems(): # Create contact for user log.info("Processing mail %s" % key) msgs.append(MailMessage(mail)) msgs = sorted(msgs, key=lambda msg: msg.date) for msg in msgs: for type, addresses in msg.recipients.iteritems(): if not addresses: continue for alias, _address in addresses: try: ContactLookup.get(user, alias) except NotFound: log.info("Creating contact %s" % alias) infos = {"mail": alias} name, domain = alias.split("@") if os.path.isfile("%s/%s.png" % (AVATAR_DIR, name)): infos.update({"avatar": "%s.png" % name}) contact = NewContact() contact.family_name = name email = NewEmail() email.address = alias Contact.create(user, contact, emails=[email]) res = agent.process(mailfrom, rcpts, msg.mail.as_string()) log.info("Process result %r" % res)
def import_email(email, import_path, format, contact_probability, **kwargs): """Import emails for an user.""" from caliopen_main.user.core import User from caliopen_main.contact.core import Contact, ContactLookup from caliopen_main.message.parsers.mail import MailMessage from caliopen_main.contact.parameters import NewContact, NewEmail from caliopen_nats.delivery import UserMailDelivery from caliopen_main.message.core import RawMessage from caliopen_storage.config import Configuration max_size = int(Configuration("global").get("object_store.db_size_limit")) if 'to' in kwargs and kwargs['to']: dest_email = kwargs['to'] else: dest_email = email if format == 'maildir': if dest_email != email: raise Exception('Cannot change To email using maildir format') emails = Maildir(import_path, factory=message_from_file) mode = 'maildir' else: if os.path.isdir(import_path): mode = 'mbox_directory' emails = {} files = [ f for f in os.listdir(import_path) if os.path.isfile(os.path.join(import_path, f)) ] for f in files: try: log.debug('Importing mail from file {}'.format(f)) with open('%s/%s' % (import_path, f)) as fh: data = fh.read() data = re.sub('^To: (.*)', 'To: %s' % dest_email, data, flags=re.MULTILINE) emails[f] = message_from_string(data) except Exception as exc: log.error('Error importing email {}'.format(exc)) else: mode = 'mbox' emails = mbox(import_path) user = User.by_local_identifier(dest_email, 'email') log.info("Processing mode %s" % mode) for key, data in emails.iteritems(): # Prevent creating message too large to fit in db. # (should use inject cmd for large messages) size = len(data.as_string()) if size > max_size: log.warn("Message too large to fit into db. \ Please, use 'inject' cmd for importing large emails.") continue raw = RawMessage.create(data.as_string()) log.debug('Created raw message {}'.format(raw.raw_msg_id)) message = MailMessage(data.as_string()) dice = random() if dice <= contact_probability: for participant in message.participants: try: ContactLookup.get(user, participant.address) except NotFound: log.info('Creating contact %s' % participant.address) name, domain = participant.address.split('@') contact_param = NewContact() contact_param.family_name = name if participant.address: e_mail = NewEmail() e_mail.address = participant.address contact_param.emails = [e_mail] Contact.create(user, contact_param) else: log.info('No contact associated to raw {} '.format(raw.raw_msg_id)) processor = UserMailDelivery( user, user.local_identities[0]) # assume one local identity try: obj_message = processor.process_raw(raw.raw_msg_id) except Exception as exc: if isinstance(exc, DuplicateMessage): log.info('duplicate message {}, not imported'.format( raw.raw_msg_id)) else: log.exception(exc) else: log.info('Created message {}'.format(obj_message.message_id))
def import_email(email, import_path, format, contact_probability, **kwargs): """Import emails for an user.""" from caliopen_main.user.core import User, UserIdentity from caliopen_main.contact.core import Contact, ContactLookup from caliopen_main.message.parsers.mail import MailMessage from caliopen_main.contact.parameters import NewContact, NewEmail from caliopen_nats.delivery import UserMailDelivery from caliopen_main.message.core import RawMessage from caliopen_storage.config import Configuration max_size = int(Configuration("global").get("object_store.db_size_limit")) if 'to' in kwargs and kwargs['to']: dest_email = kwargs['to'] else: dest_email = email if format == 'maildir': if dest_email != email: raise Exception('Cannot change To email using maildir format') emails = Maildir(import_path, factory=message_from_file) mode = 'maildir' else: if os.path.isdir(import_path): mode = 'mbox_directory' emails = {} files = [f for f in os.listdir(import_path) if os.path.isfile(os.path.join(import_path, f))] for f in files: try: log.debug('Importing mail from file {}'.format(f)) with open('%s/%s' % (import_path, f)) as fh: data = fh.read() data = re.sub('^To: (.*)', 'To: %s' % dest_email, data, flags=re.MULTILINE) emails[f] = message_from_string(data) except Exception as exc: log.error('Error importing email {}'.format(exc)) else: mode = 'mbox' emails = mbox(import_path) user = User.by_local_identifier(dest_email, 'email') log.info("Processing mode %s" % mode) for key, data in emails.iteritems(): # Prevent creating message too large to fit in db. # (should use inject cmd for large messages) size = len(data.as_string()) if size > max_size: log.warn("Message too large to fit into db. \ Please, use 'inject' cmd for importing large emails.") continue raw = RawMessage.create(data.as_string()) log.debug('Created raw message {}'.format(raw.raw_msg_id)) message = MailMessage(data.as_string()) dice = random() if dice <= contact_probability: for participant in message.participants: try: ContactLookup.get(user, participant.address) except NotFound: log.info('Creating contact %s' % participant.address) name, domain = participant.address.split('@') contact_param = NewContact() contact_param.family_name = name if participant.address: e_mail = NewEmail() e_mail.address = participant.address contact_param.emails = [e_mail] Contact.create(user, contact_param) log.info('No contact associated to raw {} '.format(raw.raw_msg_id)) processor = UserMailDelivery(user, user.local_identities[0]) # assume one local identity try: obj_message = processor.process_raw(raw.raw_msg_id) except Exception as exc: log.exception(exc) else: log.info('Created message {}'.format(obj_message.message_id))