def download_attachment(): ''' Will download emails with subject 'smart_list' that are also unread. The file itself can be named whatever, but it is gong to be under the file name downloaded_list.txt ''' #filter email in inbox and send the list to us to process download_folder = os.path.dirname(os.path.realpath(__file__)) mail = imbox.Imbox('imap.gmail.com', username='******', password='******', ssl=True, ssl_context=None, starttls=False) messages = mail.messages(unread=True, subject="smart_list", raw='has:attachment') #for each email, same download the attachments. for (uid, message) in messages: for idx, attachment in enumerate(message.attachments): try: #att_fn = attachment.get('filename') I don't want the folder to be clogged with different files that mean similar things, it's better to call it one file and then override it instead. download_path = "{}/{}".format( download_folder, "downloaded_list.txt" ) #donwload file as "downloaded_list.txt" open(download_path, "wb").write( attachment.get('content').read()) #open file just to read. except: print(traceback.print_exc()) mail.mark_seen(uid) #mark email as read. mail.logout() #logout.
def connect_mailbox(self): self.logger.debug("Connecting to %s.", self.parameters.mail_host) mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) return mailbox
def process(self): self.logger.debug("Connecting to %s.", self.parameters.mail_host) mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.folder, unread=True, sent_to=getattr(self.parameters, "sent_to", None), sent_from=getattr(self.parameters, "sent_from", None)) if emails: for uid, message in emails: if (self.parameters.subject_regex and not re.search( self.parameters.subject_regex, re.sub(r"\r\n\s", " ", message.subject))): self.logger.debug( "Message with date %s skipped because subject %r does not match.", message.date, message.subject) continue for attach in message.attachments: if not attach: continue attach_filename = attach['filename'] if attach_filename.startswith( '"' ): # for imbox versions older than 0.9.5, see also above attach_filename = attach_filename[1:-1] if re.search(self.parameters.attach_regex, attach_filename): self.logger.debug("Found suitable attachment %s.", attach_filename) if self.parameters.attach_unzip: zipped = zipfile.ZipFile(attach['content']) raw_report = zipped.read(zipped.namelist()[0]) else: raw_report = attach['content'].read() report = self.new_report() report.add("raw", raw_report) self.send_message(report) # Only mark read if message relevant to this instance, # so other instances watching this mailbox will still # check it. mailbox.mark_seen(uid) self.logger.debug("Email report read.") else: self.logger.debug("No unread mails to check.") mailbox.logout()
def connect_mailbox(self): self.logger.debug("Connecting to %s.", self.parameters.mail_host) ca_file = getattr(self.parameters, 'ssl_ca_certificate', None) ssl_custom_context = ssl.create_default_context(cafile=ca_file) mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl, ssl_context=ssl_custom_context) return mailbox
def connect_mailbox(self): self.logger.debug("Connecting to %s.", self.parameters.mail_host) ca_file = getattr(self.parameters, 'ssl_ca_certificate', None) ssl_custom_context = ssl.create_default_context(cafile=ca_file) mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl, # imbox itself uses ports 143/993 as default depending on SSL setting port=getattr(self.parameters, 'mail_port', None), ssl_context=ssl_custom_context) return mailbox
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.folder, unread=True) if emails: for uid, message in emails: if (self.parameters.subject_regex and not re.search(self.parameters.subject_regex, re.sub("\r\n\s", " ", message.subject))): continue for body in message.body['plain']: match = re.search(self.parameters.url_regex, str(body)) if match: url = match.group() # strip leading and trailing spaces, newlines and # carriage returns url = url.strip() self.logger.info("Downloading report from %r.", url) resp = requests.get(url=url, auth=self.auth, proxies=self.proxy, headers=self.http_header, verify=self.http_verify_cert, cert=self.ssl_client_cert, timeout=self.http_timeout) if resp.status_code // 100 != 2: raise ValueError( 'HTTP response status code was {}.' ''.format(resp.status_code)) self.logger.info("Report downloaded.") template = self.new_report() for report in generate_reports( template, io.BytesIO(resp.content), self.chunk_size, self.chunk_replicate_header): self.send_message(report) # Only mark read if message relevant to this instance, # so other instances watching this mailbox will still # check it. mailbox.mark_seen(uid) self.logger.info("Email report read.") mailbox.logout()
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.mail_folder, unread=True) if emails: for uid, message in emails: if (self.parameters.mail_subject_regex and not re.search( self.parameters.mail_subject_regex, message.subject)): continue self.logger.info("Reading email report") for attach in message.attachments: if not attach: continue # remove quote marks from filename attach_name = attach['filename'][1:len(attach['filename'] ) - 1] if re.search(self.parameters.mail_attach_regex, attach_name): if self.parameters.mail_attach_unzip: zipped = zipfile.ZipFile(attach['content']) raw_report = zipped.read(zipped.namelist()[0]) else: raw_report = attach['content'].read() report = Report() report.add("raw", raw_report, sanitize=True) report.add("feed.name", self.parameters.feed, sanitize=True) report.add("feed.accuracy", self.parameters.accuracy, sanitize=True) time_observation = DateTime().generate_datetime_now() report.add('time.observation', time_observation, sanitize=True) self.send_message(report) mailbox.mark_seen(uid) self.logger.info("Email report read")
def connect_mailbox(self): self.logger.debug("Connecting to %s.", self.mail_host) if self.http_verify_cert is True: ca_file = self.ssl_ca_certificate ssl_custom_context = ssl.create_default_context(cafile=ca_file) else: ssl_custom_context = ssl._create_unverified_context() mailbox = imbox.Imbox( self.mail_host, self.mail_user, self.mail_password, self.mail_ssl, # imbox itself uses ports 143/993 as default depending on SSL setting port=self.mail_port, starttls=self.mail_starttls, ssl_context=ssl_custom_context) return mailbox
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.folder, unread=True) if emails: for uid, message in emails: if (self.parameters.subject_regex and not re.search( self.parameters.subject_regex, message.subject)): continue self.logger.info("Reading email report") for attach in message.attachments: if not attach: continue # remove quote marks from filename attach_name = attach['filename'][1:len(attach['filename'] ) - 1] if re.search(self.parameters.attach_regex, attach_name): if self.parameters.attach_unzip: zipped = zipfile.ZipFile(attach['content']) raw_report = zipped.read(zipped.namelist()[0]) else: raw_report = attach['content'].read() report = Report() report.add("raw", raw_report) report.add("feed.name", self.parameters.feed) report.add("feed.accuracy", self.parameters.accuracy) self.send_message(report) # Only mark read if message relevant to this instance, # so other instances watching this mailbox will still # check it. mailbox.mark_seen(uid) self.logger.info("Email report read") mailbox.logout()
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.folder, unread=True, sent_to=getattr(self.parameters, "sent_to", None), sent_from=getattr(self.parameters, "sent_from", None)) if emails: for uid, message in emails: if (self.parameters.subject_regex and not re.search(self.parameters.subject_regex, re.sub("\r\n\s", " ", message.subject))): continue for attach in message.attachments: if not attach: continue # remove quote marks from filename attach_name = attach['filename'][ 1:len(attach['filename']) - 1] if re.search(self.parameters.attach_regex, attach_name): if self.parameters.attach_unzip: zipped = zipfile.ZipFile(attach['content']) raw_report = zipped.read(zipped.namelist()[0]) else: raw_report = attach['content'].read() report = self.new_report() report.add("raw", raw_report) self.send_message(report) # Only mark read if message relevant to this instance, # so other instances watching this mailbox will still # check it. mailbox.mark_seen(uid) self.logger.debug("Email report read.") mailbox.logout()
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.folder, unread=True) if emails: for uid, message in emails: if (self.parameters.subject_regex and not re.search(self.parameters.subject_regex, re.sub("\r\n\s", " ", message.subject))): continue for attach in message.attachments: if not attach: continue attach_filename = attach['filename'] if attach_filename.startswith('"'): # for imbox versions older than 0.9.5, see also above attach_filename = attach_filename[1:-1] if re.search(self.parameters.attach_regex, attach_filename): if self.parameters.attach_unzip: zipped = zipfile.ZipFile(attach['content']) raw_report = zipped.read(zipped.namelist()[0]) else: raw_report = attach['content'].read() report = self.new_report() report.add("raw", raw_report) self.send_message(report) # Only mark read if message relevant to this instance, # so other instances watching this mailbox will still # check it. mailbox.mark_seen(uid) self.logger.debug("Email report read.") mailbox.logout()
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.mail_folder, unread=True) if emails: for uid, message in emails: if (self.parameters.mail_subject_regex and not re.search( self.parameters.mail_subject_regex, message.subject)): continue self.logger.info("Reading email report") for body in message.body['plain']: match = re.search(self.parameters.mail_url_regex, body) if match: url = match.group() self.logger.info("Downloading report from %s" % url) resp = requests.get(url=url) if resp.status_code // 100 != 2: raise ValueError( 'HTTP response status code was {}.' ''.format(resp.status_code)) self.logger.info("Report downloaded.") report = Report() report.add("raw", resp.content) report.add("feed.name", self.parameters.feed) report.add("feed.accuracy", self.parameters.accuracy) self.send_message(report) mailbox.mark_seen(uid) self.logger.info("Email report read")
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.mail_folder, unread=True) if emails: for uid, message in emails: if (self.parameters.mail_subject_regex and not re.search(self.parameters.mail_subject_regex, message.subject)): continue self.logger.info("Reading email report") for body in message.body['plain']: match = re.search(self.parameters.mail_url_regex, body) if match: url = match.group() self.logger.info("Downloading report from %s" % url) raw_report = fetch_url(url, timeout=60.0, chunk_size=16384) self.logger.info("Report downloaded.") report = Report() report.add("raw", raw_report, sanitize=True) report.add("feed.name", self.parameters.feed, sanitize=True) report.add("feed.accuracy", self.parameters.accuracy, sanitize=True) time_observation = DateTime().generate_datetime_now() report.add('time.observation', time_observation, sanitize=True) self.send_message(report) mailbox.mark_seen(uid) self.logger.info("Email report read")
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, utils.base64_decode(self.parameters.mail_password), self.parameters.mail_ssl) self.logger.info("Connected to mail server") emails = mailbox.messages(folder=self.parameters.folder, unread=True) try: if emails: self.logger.info("Parsing emails in mailbox") for uid, message in emails: if self.parameters.subject_regex and not re.search(self.parameters.subject_regex, message.subject): continue self.logger.info("Reading email report") if hasattr(message,'attachments') and message.attachments: for attach in message.attachments: if not attach: continue attach_name = attach['filename'][1:len(attach['filename'])-1] # remove quote marks from filename if re.search(self.parameters.attach_regex, attach_name): self.logger.info("Parsing attachment") if self.parameters.attach_unzip: zipped = zipfile.ZipFile(attach['content']) raw_report = zipped.read(zipped.namelist()[0]) else: raw_report = attach['content'].read() self.logger.info('content read') report = Report() report.add("raw", raw_report, sanitize=True) report.add("feed.name", self.parameters.feed,sanitize=True) report.add("feed.accuracy", self.parameters.accuracy, sanitize=True) time_observation = DateTime().generate_datetime_now() #report.add('time.observation', time_observation) report.add('feed.reportname', message.subject, sanitize=True) self.logger.info('rocking in a free world') self.send_message(report) self.logger.info('just some administration left') mailbox.mark_seen(uid) self.logger.info("Email report read") else: # If no attachment, read from url # update way of fetching from url to new way in http/ self.logger.info("No attachment found, trying collecting from URL") for body in message.body['plain']: self.logger.info("Parsing message body") match = re.search(self.parameters.url_regex, body) if match: url = match.group() self.logger.info("Downloading report from %s" % url) resp = requests.get(url=url) if resp.status_code // 100 != 2: raise ValueError('HTTP response status code was {}.' ''.format(resp.status_code)) raw_report = resp.content self.logger.info("Report downloaded.") report = Report() report.add("raw", raw_report, sanitize=True) report.add("feed.name", self.parameters.feed, sanitize=True) report.add("feed.accuracy", self.parameters.accuracy, sanitize=True) self.logger.info("all is well sir") time_observation = DateTime().generate_datetime_now() #report.add('time.observation', time_observation, sanitize=True) report.add('feed.reportname', message.subject, sanitize=True) self.send_message(report) mailbox.mark_seen(uid) self.logger.info("Email report read") except: self.logger.info("ERROR with the collector ---")
def connect_mailbox(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) return mailbox
def reconnect(self): # try: self.disconnect() # except self.connection = imbox.Imbox(self.host, self.user, self.password, self.ssl)
def connect(self): self.connection = imbox.Imbox(self.host, self.user, self.password, self.ssl)
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.folder, unread=True) if emails: for uid, message in emails: if (self.parameters.subject_regex and not re.search(self.parameters.subject_regex, re.sub("\r\n\s", " ", message.subject))): continue erroneous = False # If errors occured this will be set to true. for body in message.body['plain']: match = re.search(self.parameters.url_regex, str(body)) if match: url = match.group() # strip leading and trailing spaces, newlines and # carriage returns url = url.strip() self.logger.info("Downloading report from %r.", url) timeoutretries = 0 resp = None while timeoutretries < self.http_timeout_max_tries and resp is None: try: resp = requests.get( url=url, auth=self.auth, proxies=self.proxy, headers=self.http_header, verify=self.http_verify_cert, cert=self.ssl_client_cert, timeout=self.http_timeout_sec) except requests.exceptions.Timeout: timeoutretries += 1 self.logger.warn( "Timeout whilst downloading the report.") if resp is None and timeoutretries >= self.http_timeout_max_tries: self.logger.error( "Request timed out %i times in a row. " % timeoutretries) erroneous = True # The download timed out too often, leave the Loop. continue if resp.status_code // 100 != 2: raise ValueError( 'HTTP response status code was {}.' ''.format(resp.status_code)) self.logger.info("Report downloaded.") template = self.new_report() for report in generate_reports( template, io.BytesIO(resp.content), self.chunk_size, self.chunk_replicate_header): self.send_message(report) # Only mark read if message relevant to this instance, # so other instances watching this mailbox will still # check it. mailbox.mark_seen(uid) if not erroneous: self.logger.info("Email report read.") else: self.logger.error( "Email report read with errors, the report was not processed." ) mailbox.logout()
def process(self): mailbox = imbox.Imbox(self.parameters.mail_host, self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl) emails = mailbox.messages(folder=self.parameters.folder, unread=True) if emails: for uid, message in emails: if (self.parameters.subject_regex and not re.search( self.parameters.subject_regex, message.subject)): continue self.logger.info("Reading email report") for body in message.body['plain']: match = re.search(self.parameters.url_regex, str(body)) if match: url = match.group() url = url.strip( ) # strip leading and trailing spaces, newlines and carriage returns # Build request self.http_header = getattr(self.parameters, 'http_header', {}) self.http_verify_cert = getattr( self.parameters, 'http_verify_cert', True) if hasattr(self.parameters, 'http_user') and hasattr( self.parameters, 'http_password'): self.auth = (self.parameters.http_user, self.parameters.http_password) else: self.auth = None http_proxy = getattr(self.parameters, 'http_proxy', None) https_proxy = getattr(self.parameters, 'http_ssl_proxy', None) if http_proxy and https_proxy: self.proxy = { 'http': http_proxy, 'https': https_proxy } else: self.proxy = None self.http_header[ 'User-agent'] = self.parameters.http_user_agent self.logger.info("Downloading report from %s" % url) resp = requests.get(url=url, auth=self.auth, proxies=self.proxy, headers=self.http_header, verify=self.http_verify_cert) if resp.status_code // 100 != 2: raise ValueError( 'HTTP response status code was {}.' ''.format(resp.status_code)) self.logger.info("Report downloaded.") report = Report() report.add("raw", resp.content) report.add("feed.name", self.parameters.feed) report.add("feed.accuracy", self.parameters.accuracy) self.send_message(report) # Only mark read if message relevant to this instance, # so other instances watching this mailbox will still # check it. mailbox.mark_seen(uid) self.logger.info("Email report read")