Example #1
0
    def fillUp(self):
        fruits= []
        #status= open("status.remi","r",encoding="utf8")
        #self.myEmails
        #if self.connected==False:
        self.connect()

        result, data = self.mail.uid('search', None, "ALL") # search and return uids instead
        id_list = data[0].split()

        for latest_email_uid in id_list[-100::1]:
            uniqueEmail=repr(latest_email_uid)
            if False:
                pass

            else:
                result, data = self.mail.uid('fetch', latest_email_uid, '(RFC822)')
                raw_email = data[0][1]
                # here's the body, which is raw text of the whole email
                # including headers and alternate payloads

                #Parsing
                manager=BytesParser()
                email_message = manager.parsebytes(raw_email)

                try:
                    message_juice= email_message.get_payload(decode=False)
                    while type(message_juice)==type([1,2]) and type(message_juice[0].get_payload(decode=False))==type([1,2]):
                        message_juice= message_juice[0].get_payload(decode=False)

                    if type(message_juice)==type([1,2]):
                        if message_juice[-1].get_filename() == None:
                            html_message_juice= message_juice[-1].get_payload(decode=True)
                        else:
                            html_message_juice= message_juice[0].get_payload(decode=True)
                    else:
                        html_message_juice= email_message.get_payload(decode=True)

                    try:
                        #fruits.append(html_message_juice.decode())
                        ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8")
                        ssd.write(html_message_juice.decode())
                        ssd.close()
                        #newBlog= Blog(title=email_message['Subject'], body= html_message_juice.decode())
                        #newBlog.save()
                        #self.setData(self,uniqueID=uniqueEmail) #string of latest_email_uid
                    except:
                        #fruits.append(html_message_juice.decode('windows-1251'))
                        ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8")
                        ssd.write(html_message_juice.decode('windows-1251'))
                        ssd.close()
                        #newBlog= Blog(title=email_message['Subject'], body= html_message_juice.decode('windows-1251'))
                        #newBlog.save()
                        #self.setData(self,uniqueID=uniqueEmail) #string of latest_email_uid

                except:
                    #fruits.append("This email could not be processed see what happened \n\nSubject: "+email_message['Subject'])
                    ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8")
                    ssd.write("This email could not be processed see what happened \n\nSubject: "+email_message['Subject'])
                    ssd.close()
Example #2
0
    def _login_btn_clicked(self):
        # print("Clicked")
        username = self.input_User.get()
        password = self.input_Pass.get()

        print(username, password)

        #conexion a servicios de gmail
        M = poplib.POP3_SSL('pop.gmail.com')
        M.user(username)
        M.pass_(password)
        #obtiene el numero de mensaje
        numero = len(M.list()[1])
        #Obtiene mensaje
        global response, headerLines, bytes
        for i in range(numero):
            # Se lee el mensaje
            response, headerLines, bytes = M.retr(i + 1)
        #se mete todo en un string
        mensaje = b'\n'.join(headerLines)
        #se parsea
        # Se parsea el mensaje
        p = BytesParser()
        email = p.parsebytes(mensaje)
        #crea nueva ventana
        self.new_window(email)
def decode_email(
        msg_str, pos, key_map
):  # process whole email parts and build email list/dict records
    filenames = None
    p = BytesParser()
    message = p.parsebytes(msg_str)  # get header
    parts = parse_parts(message,
                        key_map)  # add header parts specified in key_map
    parts['Size'] = len(msg_str)
    plain_body = ''
    html_body = ''
    for part in message.walk():

        plain_body += decode_part(part, 'text/plain')
        if len(plain_body) > 0:
            html_body = ""
        else:
            html_body += decode_part(part, 'text/html')

        fn = part.get_filename()
        if fn:
            if filenames == None: filenames = []
            filenames.append(fn)
    if filenames:
        parts['Attachments'] = filenames
    if len(plain_body) > 0:
        parts['text/plain'] = plain_body
    elif len(html_body) > 0:
        parts['text/html'] = html_body
    return parts
Example #4
0
class EncryptedMailDir(BitMessageMailDir):
    def __init__(self, bmAddr: str, mailDirPath: Path):
        super().__init__(bmAddr, mailDirPath)
        self.parser = BytesParser()

    async def getMessageByKey(self, key):
        try:
            message = self.folderInbox[key]
            payload = base64.b64decode(message.get_payload().encode())

            decoded = await self.rsaExec.decryptData(BytesIO(payload),
                                                     self.privKey)
            msg = self.parser.parsebytes(decoded)
        except Exception:
            traceback.print_exc()
        else:
            await asyncio.sleep(0.1)
            log.debug(f'Decoded message (key: {key})')
            return msg

    async def setup(self):
        from galacteek.crypto.rsa import RSAExecutor
        self.rsaExec = RSAExecutor()

        # TODO: attach external keys (this is temporary)
        self.privKey, self.pubKey = await self.rsaExec.genKeys()

        self.path.mkdir(parents=True, exist_ok=True)
        self.folderInbox = self.maildir.add_folder('new')

    async def encryptMessage(self, message):
        from io import BytesIO
        try:
            body = bytes(message)

            encrypted = await self.rsaExec.encryptData(BytesIO(body),
                                                       self.pubKey)
            payload = base64.b64encode(encrypted).decode()
        except Exception as e:
            raise e
        else:
            eMessage = MaildirMessage()
            eMessage['From'] = '*****@*****.**'
            eMessage.set_payload(payload)
            return eMessage

    async def store(self, message):
        log.debug('Storing message in encrypted maildir ..')
        try:
            eMsg = await self.encryptMessage(message)
            if eMsg:
                self.folderInbox.add(eMsg)
            else:
                raise Exception('Could not encrypt message')
        except Exception as err:
            log.debug(str(err))
            return False
        else:
            return True
Example #5
0
    def load_email_messages(self, message_set):
        """
        load_email_messages yields an EmailMessage for each email defined in message_set
        """
        parser = BytesParser(policy=email.policy.default)

        for email_bytes in self.load_raw_emails(message_set):
            yield parser.parsebytes(text=email_bytes)
Example #6
0
def fillUp(modeladmin, request, queryset):

    for obj in queryset:
        #if self.connected==False:
        obj.connect()

        result, data = obj.mail.uid('search', None,
                                    "ALL")  # search and return uids instead
        id_list = data[0].split()

        for latest_email_uid in id_list[-100::1]:
            result, data = obj.mail.uid('fetch', latest_email_uid, '(RFC822)')
            raw_email = data[0][1]

            # here's the body, which is raw text of the whole email
            # including headers and alternate payloads

            #Parsing
            manager = BytesParser()
            email_message = manager.parsebytes(raw_email)

            try:
                message_juice = email_message.get_payload(decode=False)
                while type(message_juice) == type([1, 2]) and type(
                        message_juice[0].get_payload(decode=False)) == type(
                            [1, 2]):
                    message_juice = message_juice[0].get_payload(decode=False)

                if type(message_juice) == type([1, 2]):
                    if message_juice[-1].get_filename() == None:
                        html_message_juice = message_juice[-1].get_payload(
                            decode=True)
                    else:
                        html_message_juice = message_juice[0].get_payload(
                            decode=True)
                else:
                    html_message_juice = email_message.get_payload(decode=True)

                try:
                    newBlog = Blog(title=email_message['Subject'],
                                   body=html_message_juice.decode())
                    newBlog.save()
                except:
                    newBlog = Blog(
                        title=email_message['Subject'],
                        body=html_message_juice.decode('windows-1251'))
                    newBlog.save()

            except:
                newBlog = Blog(
                    title=email_message['Subject'],
                    body=
                    "This email could not be processed see what happened \n\nSubject: "
                    + email_message['Subject'])
                newBlog.save()
                pass
            obj.setData(repr(latest_email_uid))
Example #7
0
 def get_content(self, raw):
     data = base64.urlsafe_b64decode(raw)
     email_parser = EmailParser(policy=policy.default)
     email = email_parser.parsebytes(data)
     plain = email.get_body(preferencelist=('plain',))
     body = None
     if plain:
         body = plain.get_payload()
     email_dict = dict(email)
     email_dict['body'] = body
     return email_dict
Example #8
0
 def get_content(self, raw):
     data = base64.urlsafe_b64decode(raw)
     email_parser = EmailParser(policy=policy.default)
     email = email_parser.parsebytes(data)
     plain = email.get_body(preferencelist=('plain', ))
     body = None
     if plain:
         body = plain.get_payload()
     email_dict = dict(email)
     email_dict['body'] = body
     return email_dict
Example #9
0
    def handle(self, *args, **options):
        host = options["imaphost"]
        user = options["imapuser"]
        password = getpass.getpass(f"Password for {user}@{host}: ")

        parser = BytesParser(policy=default)

        with imaplib.IMAP4(host) as imap:
            status, message = imap.login(user, password)
            if status != "OK":
                raise CommandError("Cannot login")
            logger.debug(message)

            status, _ = imap.select()
            if status != "OK":
                raise CommandError("Cannot select")

            status, data = imap.search(None, "ALL")
            if status != "OK":
                raise CommandError("Cannot search")

            message_numbers = data[0].split()
            logger.debug(f"Found {len(message_numbers)} messages")

            for message_number in tqdm.tqdm(message_numbers):
                status, data = imap.fetch(message_number, "(RFC822)")
                if status != "OK":
                    raise CommandError(f"Cannot fetch {message_number}")

                content = data[0][1]
                message = parser.parsebytes(content)

                try:
                    # Create models
                    senders = _convert_addresses(message["From"].addresses)
                    recipients = _convert_addresses(message["To"].addresses)
                    subject = message["Subject"]
                    date = message["Date"].datetime
                    attachments = _convert_attachments(message)

                    model = Message(
                        sender=senders[0] if senders else None,
                        subject=subject,
                        date=date,
                        content=content,
                    )
                    model.save()

                    model.recipients.add(*recipients)
                    model.attachments.add(*attachments)

                    model.save()
                except:
                    logger.error(f"Error when saving {message_number}")
Example #10
0
def decode_email(msg_str: bytes) -> str:
    """ decode msg string to object """

    p = BytesParser()
    message = p.parsebytes(msg_str)
    decoded_message = ""
    for part in message.walk():
        charset = part.get_content_charset()
        if part.get_content_type() == "text/plain":
            part_str = part.get_payload(decode=1)
            decoded_message += part_str.decode(charset)
    return decoded_message
Example #11
0
 def add_received_header(self, peer: Tuple[str, int], msg: bytes,
                         channel: PatchedSMTPChannel) -> bytes:
     parser = BytesParser(_class=SaneMessage, policy=_compat32_smtp_policy)
     # TODO: remove type annotation and cast when BytesParser on Typeshed gains .parsebytes
     new_msg = cast(SaneMessage, None)  # type: SaneMessage
     new_msg = parser.parsebytes(msg)  # type: ignore
     new_msg.prepend_header(
         "Received",
         "from %s (%s:%s)\r\n\tby %s (%s [%s:%s]) with SMTP;\r\n\t%s" %
         (channel.seen_greeting, peer[0], peer[1], self.server_name,
          self.daemon_name, self._localaddr[0], self._localaddr[1],
          timezone.now().strftime("%a, %d %b %Y %H:%M:%S %z (%Z)")))
     return new_msg.as_bytes()
Example #12
0
    def _format_denied_recipients(self, original_mail: bytes,
                                  recipients: Sequence[str]) -> bytes:
        parser = BytesParser()
        # TODO: fix type annotation when typeshed has better stubs
        msg = cast(Message, None)  # type: Message
        msg = parser.parsebytes(original_mail, True)  # type: ignore
        msg["Subject"] = "[mailforwarder error] Re: %s" % msg["Subject"]
        # this should never be None at this point, but typewise it could be
        msg["To"] = cast(Union[str, Header], msg["From"])
        msg["From"] = "mailforwarder bounce <>"

        rcptlist = ""
        for rcpt in recipients:
            rcptlist = "%s\n%s" % (
                "  * %s" % rcpt,
                rcptlist,
            )
        txt = denied_recipients_template.format(rcptlist=rcptlist)
        msg.set_payload(txt, charset='utf-8')
        return msg.as_bytes(policy=policy.SMTP)
def getMailAttachment(connection, mailID, AttachmentNr):
    """AttachmentNr starting with 1
    """
    result,data = connection.fetch(mailID,"(RFC822)")
    raw_email = data[0][1]
    p = BytesParser()
    msg = p.parsebytes(raw_email)
    sender = msg.get('From')
    subject = msg.get('Subject')
    date_of_mail = msg.get('Date')
    mail_as_list = msg.get_payload()
    try:
        attachment = mail_as_list[AttachmentNr] # 0 is the message itself
        attachmentName = attachment.get_filename()
    except:
        attachment = None
        attachmentName = None
        print('Anhang Nr. ', AttachmentNr, ' of mail ', mailID, ' does not exist.')
    if 'Remotefox' not in sender:
        attachment = None
        attachmentName = None     
    return attachment, attachmentName, date_of_mail, subject
Example #14
0
def _read_data(request_handler):
    raw_data = _decode_raw_data(_read_raw_data(request_handler),
                                request_handler)

    if not raw_data:
        return None

    content_type = request_handler.headers[
        "Content-Type"] or "application/octet-stream"

    if content_type.startswith("application/json"):
        return json.loads(raw_data)

    if content_type.startswith("multipart/"):
        parser = BytesParser()
        header = bytes("Content-Type:" + content_type + "\r\n", "utf-8")

        msg = parser.parsebytes(header + raw_data)
        payload = msg.get_payload()

        if content_type.startswith("multipart/mixed"):
            # Batch https://cloud.google.com/storage/docs/json_api/v1/how-tos/batch
            rv = list()
            for item in payload:
                parsed_params = _parse_batch_item(item)
                rv.append(parsed_params)

            return rv

        # For multipart upload, google API expect the first item to be a json-encoded
        # object, and the second (and only other) part, the file content
        return {
            "meta": json.loads(payload[0].get_payload()),
            "content": payload[1].get_payload(decode=True),
            "content-type": payload[1].get_content_type(),
        }

    return raw_data
    def parse(self, response):
        """
        `parse` should always `yield` Meeting items.

        Change the `_parse_title`, `_parse_start`, etc methods to fit your scraping
        needs.
        """
        email_parser = BytesParser(policy=policy.default)
        parsed_email = email_parser.parsebytes(response.body)
        content = ""
        for part in parsed_email.iter_parts():
            if part.get_content_maintype() == "multipart":
                for sub_part in part.get_payload():
                    if sub_part.get_content_maintype() == "text":
                        content = sub_part.get_content()
                        break
        year_str = re.search(r"\d{4}", content).group()
        for date_str in re.findall(r"[A-Z][a-z]{2,8} \d{1,2}", content):
            start = self._parse_start(date_str, year_str)
            if not start:
                continue
            meeting = Meeting(
                title="Board of Trustees",
                description="",
                classification=BOARD,
                start=start,
                end=None,
                all_day=False,
                time_notes="Confirm meeting details with agency",
                location=self.location,
                links=[],
                source=response.url,
            )

            meeting["status"] = self._get_status(meeting)
            meeting["id"] = self._get_id(meeting)

            yield meeting
Example #16
0
class Mail:
    def __init__(self, mail_data, mysql_creds, threshold, sensitivity, account,
                 logger, mail_id, spam_folder):
        self.JS_IMPORT_REGEX = r'/<script.*(?:src="(.*)").*>/s'
        self.JS_EXTRACT_REGEX = r'/<script.*>(.*?)<\/script>/s'
        self.URL_REGEX = "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|[^\x00-\x7F]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
        self.parser = BytesParser()
        self.sensitivity = sensitivity
        self.threshold = threshold
        self.log = logger
        self.spam_folder = spam_folder
        self.mysql_db = mysql.connector.connect(
            user=mysql_creds["mysql_username"],
            password=mysql_creds["mysql_password"],
            database=mysql_creds["mysql_database"],
            host=mysql_creds["mysql_host"])
        self.account = account
        self.spam_points = 0
        self.js_code = {}
        self.urls_in_document = []
        self.documents = {}
        self.mail_id = mail_id
        # The headers are defined as <key>:<to_remove_from key>
        # -1 is used to define the last header, after that comes the mail contents
        self.whitelisted = False
        self.blacklisted = False
        self.parsed_mail = self.parser.parsebytes(mail_data)
        self.header_data = dict(self.parsed_mail)
        self.message = ""
        self.extract_message()
        self._spam = -1
        self.check_whitelist()
        self.check_blacklisted()
        self.urls = re.findall(self.URL_REGEX, self.message)
        for i in range(len(self.urls)):
            self.urls[i] = self.urls[i].strip()

    def add_domain_to_blacklist(self, url):
        domain = urlparse(url).hostname
        cursor = self.mysql_db.cursor()
        cursor.execute(
            "INSERT INTO new_blacklists(domain) VALUES('{}')".format(
                domain.encode("idna").decode("utf-8")))
        cursor.execute(
            "INSERT INTO domain_blacklist(domain) VALUES('{}')".format(
                domain.encode("idna").decode("utf-8")))

    def extract_message(self):
        if self.parsed_mail.is_multipart():
            for i in self.parsed_mail.get_payload():
                payload = i.get_payload(decode=True)
                try:
                    self.message += payload.decode("utf-8")
                except AttributeError as e:
                    self.log.error(
                        "AttributeError while trying to get message from mail with id "
                        + str(self.mail_id))
                    print(e)
                except UnicodeDecodeError as e:
                    self.log.error(
                        "UnicodeDecodeError while trying to get message from mail with id "
                        + str(self.mail_id))
                    print(e)
        else:
            payload = self.parsed_mail.get_payload(decode=True)
            try:
                self.message += payload.decode("utf-8")
            except AttributeError as e:
                self.log.error(
                    "AttributeError while trying to get message from mail with id "
                    + str(self.mail_id))
                print(e)
            except UnicodeDecodeError as e:
                self.log.error(
                    "UnicodeDecodeError while trying to get message from mail with id "
                    + str(self.mail_id))
                print(e)

    def check_blacklisted(self, url=None):
        if url != None:
            url = url.encode("idna").decode("utf-8")
        cursor = self.mysql_db.cursor()
        if not url == None:
            cursor.execute(
                "SELECT * FROM domain_blacklist WHERE domain LIKE '{}';".
                format(url))
            cursor.fetchall()
            if cursor.rowcount > 0:
                cursor.close()
                return True
            return False
        mail_header = self.header_data["From"].split("<")[1][:-1]
        mail = mail_header
        cursor.execute(
            "SELECT * FROM mail_blacklist WHERE mail='{}';".format(mail))
        cursor.fetchall()
        if cursor.rowcount >= 1:
            print("Blacklisted")
            self.blacklisted = True

    def check_whitelist(self, url=None):
        if url != None:
            url = url.encode("idna").decode("utf-8")
        cursor = self.mysql_db.cursor()
        if not url == None:
            cursor.execute(
                "SELECT * FROM domain_whitelist WHERE domain LIKE '%{}%';".
                format(url))
            cursor.fetchall()
            if cursor.rowcount > 0:
                cursor.close()
                return True
            return False
        mail_header = self.header_data["From"].split("<")[1][:-1]
        mail = mail_header
        cursor.execute(
            "SELECT * FROM mail_whitelist WHERE mail='{}';".format(mail))
        cursor.fetchall()
        if cursor.rowcount >= 1:
            self.whitelisted = True

    def check_special_chars(self):
        for url in self.urls:
            parsed = urllib.parse.urlparse(
                url.encode("idna").decode("utf-8").encode("utf-8").decode(
                    "idna"))
            special_char_count = 0
            for char in parsed.netloc:
                if not char == ".":
                    if not char.encode("utf-8") == char.encode("idna"):
                        print("Special char detected")
                        self._spam = 1

    def aiPredict(self, data):
        with open("aiModel", "rb") as m:
            aiModel = pickle.load(m)
            ai_in = (data["dir_num"], data["index_num"], data["length"],
                     data["out_resources"], data["robots_entries"],
                     data["special_char_num"], data["subdomain_len"],
                     data["subdomain_num"], data["tld_trust"])
            return aiModel.predict(numpy.reshape(ai_in, (1, 9)))

    def find_list_resources(self, tag, attribute, soup):
        list = []
        for x in soup.findAll(tag):
            try:
                list.append(x[attribute])
            except KeyError:
                pass
        return (list)

    def get_url_data(self, url, yandex, timeout=30):
        data = {}
        data["length"] = (len(url.split("://")[1].split("?")[0]))
        data["dir_num"] = (url.find("/") - 2)
        parsed = urlparse(
            url.encode("idna").decode("utf-8").encode("utf-8").decode("idna"))
        hostname_split = parsed.hostname.split(".")
        data["tld_trust"] = int(
            hostname_split[-1].lower() in ["com", "org", "net"])
        data["subdomain_num"] = len(hostname_split) - 2
        data["subdomain_len"] = len("".join(hostname_split[:-2]))
        special_char_count = 0
        for char in parsed.hostname:
            if char == ".":
                continue
            if not char.encode("utf-8") == char.encode("idna"):
                special_char_count += 1
        data["special_char_num"] = special_char_count
        #Advanced data extraction
        try:
            data["index_num"] = int(
                yandex.search("site:{}".format(parsed.hostname)).found["all"])
        except yandex_search.NoResultsException:
            data["index_num"] = 0
        robot_entry_counter = 0
        try:
            response = requests.get("{}://{}/robots.txt".format(
                parsed.scheme, parsed.netloc),
                                    allow_redirects=True,
                                    verify=False,
                                    timeout=timeout)
            if response.status_code == 200:
                lines = response.text.split("\n")
                lines = [x for x in lines if x != ""]
                robot_entry_counter += len([x for x in lines if x[0] != "#"])
            else:
                pass
        except Exception as e:
            print(e)
        data["robots_entries"] = robot_entry_counter
        try:
            req = requests.get(url, verify=False, timeout=timeout)
            if req.status_code == 200:
                soup = BeautifulSoup(req.text, 'html.parser')
                image_scr = self.find_list_resources('img', "src", soup)
                script_src = self.find_list_resources('script', "src", soup)
                css_link = self.find_list_resources("link", "href", soup)
                all_links = image_scr + css_link + script_src
                out_links = []
                for link in all_links:
                    parsed_link = urlparse(link)
                    if parsed_link.hostname != parsed.hostname:
                        out_links.append(link)
                data["out_resources"] = len(out_links)
            else:
                data["out_resources"] = -1
        except Exception as e:
            print(e)
            data["out_resources"] = -1
        return data

    def check_url(self, url):
        yandex = yandex_search.Yandex(
            api_user='******',
            api_key='03.1042294429:b8e679f9acadef49ebab0d9726ccef58')
        data = self.get_url_data(url, yandex, timeout=10)
        if self.aiPredict(data):
            self.add_domain_to_blacklist(url)
            self.spam_points += self.sensitivity

    def check_js(self):
        for url in self.js_code:
            for js in self.js_code[url]:
                if self.check_blacklisted(url=js):
                    self._spam = 1
                if self.check_js_code(self.js_code[js]):
                    self.add_domain_to_blacklist(url)
                    self.spam_points += self.sensitivity

    def check_disallowed_chars(self, url_start, chars=["<", ">", "'", "\""]):
        url = url_start
        for char in chars:
            if char in url:
                return True
            if urllib.parse.quote_plus(char) in url:
                return True
            if urllib.parse.quote_plus(urllib.parse.quote_plus(char)) in url:
                return True
        return False

    def check_tld(self, url):
        if urlparse(url).hostname.split(".")[-1] in ["info", "tk", "gq"]:
            self._spam = 1

    def check_domain_name(self, url):
        pass  # TODO fill this up

    def discover_url(self, urls):
        for url in urls:
            foo = False
            for i in self.documents:
                if urlparse(url).hostname == urlparse(i).hostname:
                    foo = True
            if foo:
                continue
            while 1:
                if self.check_whitelist(url=url):
                    break
                if self.check_blacklisted(url=url):
                    self._spam = 1
                    return
                self.log.info(url)
                self.check_disallowed_chars(url)
                self.keyword_search(url)
                self.check_xss(url)
                self.check_url(url)
                self.check_tld(url)
                self.check_domain_name(url)
                try:
                    r = requests.get(url, allow_redirects=False)
                except requests.exceptions.ConnectionError:
                    break
                #detect all status codes in the format 3xx
                try:
                    self.documents[url] = r.content.decode()
                    for i in self.extract_urls(self.documents[url]):
                        foo = False
                        for i in self.documents:
                            if urlparse(url).hostname == urlparse(i).hostname:
                                foo = True
                        if foo:
                            continue
                        skip = False
                        for j in SKIP:
                            if j in i:
                                skip = True
                        if skip:
                            continue
                        foo = []
                        foo.append(i.strip())
                        self.discover_url(foo)
                except UnicodeDecodeError:
                    pass
                if r.status_code == 302 or r.status_code == 303:
                    location = r.headers["location"]
                    if location.startswith("http"):
                        url = location
                    else:
                        url = "/".join(url.split("/")[:-1]) + location
                    continue
                else:
                    break

    def keyword_search(self, url):
        keywords = self.mysql_db.cursor()
        keywords.execute("SELECT * FROM keywordlist;")
        result = keywords.fetchall()
        for row in result:
            if row[0] in url:
                if ".".join(urlparse(url).hostname.split(".")[-2:]) != row[1]:
                    self._spam = 1

    def check_xss(self, url):
        malicious = self.check_disallowed_chars(url)
        if malicious:
            print("ADDED SPAM POINTS")
            self.spam_points += self.sensitivity

    #TODO add deobfuscation
    def extract_javascript(self):  #TODO not working
        p = re.compile(self.JS_IMPORT_REGEX)
        for doc in self.documents:
            self.js_code[doc] = []
            for url in p.findall(self.documents[doc]):
                r = requests.get(url, allow_redirects=False)
                if r.status_code == 200:
                    self.js_code[doc].append(r.content)
            p = re.compile(self.JS_EXTRACT_REGEX)
            for js in p.findall(doc):
                if js != "":
                    self.js_code[doc].append(js)

    def extract_urls(
        self, doc
    ):  # Extract URLs from the documents and save them to the array urls_in_document
        p = re.compile(self.URL_REGEX)
        res = p.findall(doc)
        return res

    def check_stored_xss(self):
        self.extract_javascript()
        for url in self.js_code:
            url_spam_count = 0
            for js in self.js_code[url]:
                if url_spam_count > 6:
                    self.add_domain_to_blacklist(url)
                if self.check_blacklisted(url=url):
                    self._spam = 1
                    return
                if self.check_js_code(js):
                    url_spam_count += 3
                    self.spam_points += self.sensitivity

    def check_js_code(self, code):
        parsedJs = jsParser.parseJavascript(code, False)
        return AI.aiPredict(parsedJs)

    def log_mail(self):
        cursor = self.mysql_db.cursor()
        domain = getaddresses(self.parsed_mail.get_all('from', []))[0][1]
        cursor.execute(
            "INSERT INTO logs (sender_domain,result, account, mail_id) VALUES ('{}','{}','{}','{}')"
            .format(domain, self.get_spam(), self.account, self.mail_id))
        self.mysql_db.commit()
        cursor.close()

    def check_spam(self):
        if self.whitelisted:
            self._spam = 0
            self.log_mail()
            return
        elif self.blacklisted:
            self._spam = 1
        else:
            self.discover_url(self.urls)

            self.check_stored_xss()
            if self.threshold < self.spam_points:
                self._spam = 1
        if self.get_spam() == 1:
            self.log.info("Mail moved to spam with id: " + str(self.mail_id))
            if platform.system() == "Windows":
                notification.notify(
                    title='Found spam mail by: ' +
                    getaddresses(self.parsed_mail.get_all('from', []))[0][1],
                    message=self.account,
                    app_icon=None,
                    timeout=10,
                )
            else:
                Notification(
                    title='Found spam mail by: ' +
                    getaddresses(self.parsed_mail.get_all('from', []))[0][1],
                    description=self.account,
                    duration=10,
                    urgency=Notification.URGENCY_CRITICAL).send()
        self.log_mail()

    def get_spam(self) -> int:
        if self.whitelisted:
            return 0
        return self._spam
class EmailMessage(object):
    '''
        Email Message.

        Messages should be converted to EmailMessage as soon as possible,
        to check whether the message is parsable as part of validating input.

        If a MIME message is not parsable, a new Message will be created that does conform
        and contains the original unparsable message in the body.
    '''

    DEBUGGING = False

    def __init__(self, message_or_file=None):
        '''
             Creates an EmailMessage from a Message or a file.
             Non-mime messages are converted to MIME "text/plain".

             >>> email_message = EmailMessage()
             >>> type(email_message)
             <class 'goodcrypto.mail.message.email_message.EmailMessage'>
        '''

        self.bad_header_lines = []
        self.parser = Parser()

        self._last_charset = constants.DEFAULT_CHAR_SET
        self._log = self._message = None

        if message_or_file is None:
            self._message = Message()

        elif isinstance(message_or_file, Message):
            self._message = message_or_file

        elif isinstance(message_or_file, EmailMessage):
            self._message = message_or_file.get_message()

        else:
            try:
                if isinstance(message_or_file, IOBase)  or isinstance(message_or_file, StringIO):
                    self.log_message('about to parse a message from a file')
                    try:
                        self._message = self.parser.parse(message_or_file)
                        self.log_message('parsed message from file')
                    except TypeError:
                        message_or_file.seek(0, os.SEEK_SET)
                        self.parser = BytesParser()
                        self._message = self.parser.parse(message_or_file)
                        self.log_message('parsed message from file as bytes')
                else:
                    try:
                        self.log_message('about to parse a message from a string')
                        self._message = self.parser.parsestr(message_or_file)
                        self.log_message('parsed message from string')
                    except TypeError:
                        self.parser = BytesParser()
                        self._message = self.parser.parsebytes(message_or_file)
                        self.log_message('parsed message from bytes')

                if not self.validate_message():
                    self._create_good_message_from_bad(message_or_file)
            except Exception:
                try:
                    self.log_message('EXCEPTION - see syr.exception.log for details')
                    record_exception()

                    self._create_good_message_from_bad(message_or_file)

                    # if we still don't have a good message, then blow up
                    if not self.validate_message():
                        self.log_message('unable to create a valid message')
                        raise MessageException()
                except Exception:
                    record_exception()

        if self.DEBUGGING:
            try:
                self.log_message(self.to_string())
            except:
                pass


    def get_header(self, key):
        '''
            Get a header from an existing message.

            >>> from goodcrypto_tests.mail.message_utils import get_encrypted_message_name
            >>> with open(get_encrypted_message_name('basic.txt')) as input_file:
            ...     email_message = EmailMessage(input_file)
            ...     crypto_software = email_message.get_header(constants.ACCEPTED_CRYPTO_SOFTWARE_HEADER)
            >>> crypto_software == 'GPG'
            True
        '''

        try:
            value = self.get_message().__getitem__(key)
        except Exception:
            value = None

        return value


    def add_header(self, key, value):
        '''
            Add a header to an existing message.

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> with open(get_plain_message_name('basic.txt')) as input_file:
            ...     email_message = EmailMessage(input_file)
            ...     email_message.add_header(constants.ACCEPTED_CRYPTO_SOFTWARE_HEADER, 'GPG')
            ...     crypto_software = email_message.get_header(constants.ACCEPTED_CRYPTO_SOFTWARE_HEADER)
            >>> crypto_software == 'GPG'
            True
        '''

        self._message.__setitem__(key, value)


    def change_header(self, key, value):
        '''
            Change a header to an existing message.

            >>> from goodcrypto_tests.mail.message_utils import get_encrypted_message_name
            >>> with open(get_encrypted_message_name('bouncy-castle.txt')) as input_file:
            ...     email_message = EmailMessage(input_file)
            ...     email_message.change_header(constants.ACCEPTED_CRYPTO_SOFTWARE_HEADER, 'TestGPG')
            ...     crypto_software = email_message.get_header(constants.ACCEPTED_CRYPTO_SOFTWARE_HEADER)
            >>> crypto_software == 'TestGPG'
            True
        '''

        if key in self._message:
            self._message.replace_header(key, value)
        else:
            self.add_header(key, value)


    def delete_header(self, key):
        '''
            Delete a header to an existing message.

            >>> from goodcrypto_tests.mail.message_utils import get_encrypted_message_name
            >>> with open(get_encrypted_message_name('bouncy-castle.txt')) as input_file:
            ...     email_message = EmailMessage(input_file)
            ...     email_message.delete_header(constants.ACCEPTED_CRYPTO_SOFTWARE_HEADER)
            ...     email_message.get_header(constants.ACCEPTED_CRYPTO_SOFTWARE_HEADER) is None
            True
        '''

        self._message.__delitem__(key)


    def get_message(self):
        '''
            Get the message.

            >>> from goodcrypto_tests.mail.message_utils import get_basic_email_message
            >>> from goodcrypto.oce.test_constants import EDWARD_LOCAL_USER
            >>> email_message = get_basic_email_message()
            >>> email_message.get_message() is not None
            True
            >>> email_message.get_message().get(mime_constants.FROM_KEYWORD) == EDWARD_LOCAL_USER
            True
        '''

        return self._message


    def set_message(self, new_message):
        '''
            Set the new message.

            # Get a basic message first so we can avoid recursion
            >>> from goodcrypto_tests.mail.message_utils import get_basic_email_message
            >>> from goodcrypto.oce.test_constants import EDWARD_LOCAL_USER
            >>> basic_email_message = get_basic_email_message().get_message()
            >>> email_message = EmailMessage()
            >>> email_message.get_message().get(mime_constants.FROM_KEYWORD) is None
            True
            >>> email_message.set_message(basic_email_message)
            >>> email_message.get_message().get(mime_constants.FROM_KEYWORD) == EDWARD_LOCAL_USER
            True
        '''

        old_message = self._message

        if is_string(new_message):
            try:
                if isinstance(self.parser, Parser):
                    self._message = self.parser.parsestr(new_message)
                else:
                    self._message = self.parser.parsebytes(new_message.encode())
            except:
                self._message = old_message
                record_exception()
        else:
            self._message = new_message

        # restore the old message if the new one isn't valid.
        if not self.validate_message():
            self._message = old_message
            self.log_message('restored previous message')

    def validate_message(self):
        '''
            Validate a message.

            Python's parser frequently accepts a message that has garbage in the header by
            simply adding all header items after the bad header line(s) to the body text;
            this can leave a pretty unmanageable message so we apply our own validation.

            >>> from goodcrypto_tests.mail.message_utils import get_basic_email_message
            >>> from goodcrypto.oce.test_constants import EDWARD_LOCAL_USER
            >>> email_message = get_basic_email_message()
            >>> email_message.validate_message()
            True
        '''
        try:
            validator = Validator(self)
            if validator.is_message_valid():
                valid = True
                self.log_message('message is valid')
            else:
                valid = False
                self.log_message('message is invalid')
                self.log_message(validator.get_why())
        except Exception as AttributeError:
            valid = False
            record_exception()

        return valid

    def get_text(self):
        '''
            Gets text from the current Message.

            This method works with both plain and MIME messages, except open pgp mime.
            If the message is MIMEMultipart, the text is from the first text/plain part.

            >>> from goodcrypto_tests.mail.message_utils import get_basic_email_message
            >>> email_message = get_basic_email_message()
            >>> text = email_message.get_text()
            >>> text == 'Test message text'
            True
        '''

        text = None
        message = self.get_message()

        if is_open_pgp_mime(message):
            self.log_message("unable to get text from openpgp mime message")

        else:
            if message.is_multipart():
                self.log_message("message is a MIMEMultipart")

                #  get the first text/plain part
                result_ok = False
                part_index = 0
                parts = message.get_payload()
                while part_index < len(parts) and not result_ok:
                    part = message.get_payload(part_index)
                    content_type = part.get_content_type()
                    if content_type == mime_constants.TEXT_PLAIN_TYPE:
                        text = self._get_decoded_payload(part)
                        result_ok = True
                    else:
                        self.log_message("body part type is " + content_type)
                    part_index += 1
            else:
                text = self._get_decoded_payload(message)
                self.log_message("payload is a: {}".format(type(text)))

        return text


    def set_text(self, text, charset=None):
        '''
            Sets text in the current Message.

            This method works with both plain and MIME messages, except open pgp mime.
            If the message is MIMEMultipart, the text is set in the first text/plain part.

            >>> from goodcrypto_tests.mail.message_utils import get_basic_email_message
            >>> email_message = get_basic_email_message()
            >>> email_message.set_text('New test message text')
            True
            >>> text = email_message.get_text()
            >>> text == 'New test message text'
            True
        '''

        if self.DEBUGGING: self.log_message("setting text:\n{}".format(text))

        text_set = False
        message = self.get_message()
        if message.is_multipart():
            #  set the first text/plain part
            text_set = False
            part_index = 0
            parts = message.get_payload()
            while part_index < len(parts) and not text_set:
                part = message.get_payload(part_index)
                content_type = part.get_content_type()
                if content_type == mime_constants.TEXT_PLAIN_TYPE:
                    part.set_payload(text)
                    text_set = True
                    self.log_message('the first text/plain part found')
                else:
                    self.log_message('body part type is {}'.format(content_type))
                part_index += 1

            if not text_set:
                charset, __ = get_charset(self._message, self._last_charset)
                self.log_message('no text_set char set: {}'.format(charset))
                new_part = MIMEText(text, mime_constants.PLAIN_SUB_TYPE, charset)
                message.attach(new_part)
                text_set = True
                self.log_message('added a new text/plain part with text')

        elif is_open_pgp_mime(message):
            self.log_message("unable to set text from openpgp mime message")

        else:
            self.set_content(text, mime_constants.TEXT_PLAIN_TYPE, charset=charset)
            text_set = True

        if self.DEBUGGING:
            self.log_message("message after setting text:\n" + self.to_string())
            self.log_message("set text:\n{}".format(text_set))

        return text_set


    def get_content(self):
        '''
            Get the message's content, decoding if bas64 or print-quoted encoded.

            >>> from goodcrypto_tests.mail.message_utils import get_basic_email_message
            >>> email_message = get_basic_email_message()
            >>> text = email_message.get_content()
            >>> text == 'Test message text'
            True
        '''

        decode = False
        msg = self.get_message()
        encoding = self.get_header(mime_constants.CONTENT_XFER_ENCODING_KEYWORD)
        if encoding is not None:
            encoding = encoding.lower()
            self.log_message('payloaded encoded with {}'.format(encoding))

            # only use the encoding if it's not a multipart message
            if (encoding == mime_constants.QUOTED_PRINTABLE_ENCODING or
                encoding == mime_constants.BASE64_ENCODING):
                current_content_type = self.get_message().get_content_type()
                if (current_content_type is not None and
                    current_content_type.lower().find(mime_constants.MULTIPART_PRIMARY_TYPE) < 0):
                    decode = True
                    self.log_message('decoding payload with {}'.format(encoding))

        try:
            payload = self._get_decoded_payload(self.get_message(), decode=decode)
            if self.DEBUGGING: self.log_message('decoded payloaded:\n{}'.format(payload))
            self.log_message('type of payload: {}'.format(type(payload)))
        except:
            record_exception()
            payload = message.get_payload()

        return payload

    def set_content(self, payload, content_type, charset=None):
        '''
            Set the content of the message.

            >>> from goodcrypto_tests.mail.message_utils import get_basic_email_message
            >>> email_message = get_basic_email_message()
            >>> email_message.set_content('New test message text', mime_constants.TEXT_PLAIN_TYPE)
            >>> text = email_message.get_content()
            >>> text == 'New test message text'
            True
        '''

        # create a new message if one doesn't exist
        if self._message is None:
            self._message = Message()

        current_content_type = self.get_message().get_content_type()
        if current_content_type is None:
            current_content_type = content_type
        self.log_message('current content type: {}'.format(current_content_type))
        self.log_message('setting content type: {}'.format(content_type))
        if self.DEBUGGING: self.log_message('content:\n{}'.format(payload))

        current_encoding = self.get_header(mime_constants.CONTENT_XFER_ENCODING_KEYWORD)
        if current_encoding is None:
            self._message.__setitem__(mime_constants.CONTENT_XFER_ENCODING_KEYWORD, mime_constants.BITS_8)
            self.log_message('setting content encoding: {}'.format(mime_constants.BITS_8))

        # if this is a simple text or html message, then just update the payload
        if (content_type == current_content_type and
            (content_type == mime_constants.TEXT_PLAIN_TYPE or
             content_type == mime_constants.TEXT_HTML_TYPE)):

            if charset is None:
                charset, self._last_charset = get_charset(payload, self._last_charset)
                self.log_message('getting charset from payload: {}'.format(charset))
            elif self._last_charset is None:
                self._last_charset = constants.DEFAULT_CHAR_SET
                self.log_message('setting last charset to default: {}'.format())
            else:
                self.log_message('using preset charset: {}'.format(charset))

            try:
                self.get_message().set_payload(
                   self.encode_payload(payload, current_encoding), charset=charset)
                self.log_message('set payload with {} charset'.format(charset))
                if self.DEBUGGING: self.log_message('payload set:\n{}'.format(payload))
            except UnicodeEncodeError as error:
                self.log_message(error.reason)
                self.log_message('start: {} end: {}'.format(error.start, error.end))
                self.log_message('object: {}'.format(error.object))
                self.get_message().set_payload(self.encode_payload(payload, current_encoding))
                self.log_message('setting payload without charset')
            self.get_message().set_type(content_type)

        else:
            from goodcrypto.mail.message.inspect_utils import is_content_type_mime

            self.log_message('attaching payload for {}'.format(content_type))
            if content_type == mime_constants.OCTET_STREAM_TYPE:
                part = MIMEBase(mime_constants.APPLICATION_TYPE, mime_constants.OCTET_STREAM_SUB_TYPE)
                part.set_payload(open(payload,"rb").read())
                encode_base64(part)
                part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(payload))
                self.get_message().attach(part)

            elif is_content_type_mime(self.get_message()):
                if not self.get_message().is_multipart():
                    if charset is None:
                        charset, self._last_charset = get_charset(payload, self._last_charset)
                        self.log_message('setting content with char set: {}'.format(charset))
                    else:
                        if self._last_charset is None:
                            self._last_charset = constants.DEFAULT_CHAR_SET
                    self.get_message().set_payload(self.encode_payload(payload, current_encoding), charset)
                    self.log_message('set payload with {} charset'.format(charset))
                    self.get_message().set_type(content_type)

                elif content_type == mime_constants.TEXT_PLAIN_TYPE:
                    if self.DEBUGGING: self.log_message('mime text payload:\n{}'.format(payload))
                    part = MIMEText(payload)
                    if self.DEBUGGING: self.log_message('mime text part:\n{}'.format(part))
                    part.set_payload(self.encode_payload(payload, current_encoding))
                    if self.DEBUGGING: self.log_message('mime text part with payload:\n{}'.format(part))
                    self.get_message().attach(part)

                else:
                    primary, __, secondary = content_type.partition(mime_constants.PRIMARY_TYPE_DELIMITER)
                    part = MIMEBase(primary, secondary)
                    part.set_payload(self.encode_payload(payload, current_encoding))
                    self.get_message().attach(part)

    def encode_payload(self, payload, current_encoding):
        '''
            Encode the payload.

            Test extreme case.
            >>> email_message = EmailMessage()
            >>> email_message.encode_payload(None, None)
        '''
        new_payload = payload
        if payload is not None and current_encoding is not None:
            """
            """
            if current_encoding == mime_constants.BASE64_ENCODING:
                if isinstance(payload, str):
                    payload = payload.encode()
                new_payload = b64encode(payload)
                self.log_message('encoding payload with {}'.format(current_encoding))
            elif current_encoding == mime_constants.QUOTED_PRINTABLE_ENCODING:
                if isinstance(payload, str):
                    payload = payload.encode()
                new_payload = encodestring(payload)
                self.log_message('encoding payload with {}'.format(current_encoding))
        return new_payload

    def is_probably_pgp(self):
        '''
            Returns true if this is probably an OpenPGP message.

            >>> from goodcrypto_tests.mail.message_utils import get_encrypted_message_name
            >>> with open(get_encrypted_message_name('open-pgp-mime.txt')) as input_file:
            ...     mime_message = EmailMessage(input_file)
            ...     mime_message.is_probably_pgp()
            True
        '''

        is_pgp = is_open_pgp_mime(self.get_message())
        if not is_pgp:
            content = self.get_content()
            if is_string(content):
                is_pgp = self.contains_pgp_message_delimters(content)
                self.log_message('message uses in line pgp: {}'.format(is_pgp))
            elif isinstance(content, list):
                for part in content:
                    if isinstance(part, Message):
                        part_content = part.get_payload()
                    else:
                        part_content = part

                    if is_string(part_content):
                        is_pgp = self.contains_pgp_message_delimters(part_content)
                        if is_pgp:
                            self.log_message('part of message uses in line pgp: {}'.format(is_pgp))
                            break
                    else:
                        self.log_message('part of content type is: {}'.format(repr(part_content)))
            else:
                self.log_message('content type is: {}'.format(type(content)))

        return is_pgp

    def contains_pgp_message_delimters(self, text):
        '''
            Returns true if text contains PGP message delimiters.

            >>> from goodcrypto_tests.mail.message_utils import get_encrypted_message_name
            >>> with open(get_encrypted_message_name('open-pgp-mime.txt')) as input_file:
            ...     text = input_file.read()
            ...     email_message = EmailMessage()
            ...     email_message.contains_pgp_message_delimters(text)
            True
        '''

        return (isinstance(text, str) and
                text.find(oce_constants.BEGIN_PGP_MESSAGE) >= 0 and
                text.find(oce_constants.END_PGP_MESSAGE) >= 0)

    def contains_pgp_signature_delimeters(self, text):
        '''
            Returns true if text contains PGP signature delimiters.

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> with open(get_plain_message_name('pgp-signature.txt')) as input_file:
            ...     text = input_file.read()
            ...     email_message = EmailMessage()
            ...     email_message.contains_pgp_signature_delimeters(text)
            True
        '''

        return (isinstance(text, str) and
                text.find(oce_constants.BEGIN_PGP_SIGNATURE) >= 0 and
                text.find(oce_constants.END_PGP_SIGNATURE) >= 0)

    def get_pgp_signature_blocks(self):
        '''
            Returns the PGP signature blocks with text, if there are any.

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> with open(get_plain_message_name('pgp-signature.txt')) as input_file:
            ...     mime_message = EmailMessage(input_file)
            ...     signature_blocks = mime_message.get_pgp_signature_blocks()
            ...     len(signature_blocks) > 0
            True
        '''

        def get_signed_data(content):
            ''' Get the signed data. '''

            signature_block = None
            start_index = content.find(oce_constants.BEGIN_PGP_SIGNED_MESSAGE)
            if start_index < 0:
                start_index = content.find(oce_constants.BEGIN_PGP_SIGNATURE)
            end_index = content.find(oce_constants.END_PGP_SIGNATURE)
            if start_index >= 0 and end_index > start_index:
                signature_block = content[start_index:end_index + len(oce_constants.END_PGP_SIGNATURE)]

            return signature_block

        signature_blocks = []
        if self.get_message().is_multipart():
            self.log_message('check each of {} parts of message for a signature'.format(
                len(self.get_message().get_payload())))
            part_index = 0
            parts = self.get_message().get_payload()
            for part in parts:
                part_index += 1
                if isinstance(part, str):
                    content = part
                else:
                    content = part.get_payload()
                if self.contains_pgp_signature_delimeters(content):
                    is_signed = True
                    signature_block = get_signed_data(content)
                    if signature_block is not None:
                        signature_blocks.append(signature_block)
                    self.log_message('found signature block in part {}'.format(part_index))
                part_index += 1

        else:
            content = self._get_decoded_payload(self.get_message())
            if isinstance(content, str) and self.contains_pgp_signature_delimeters(content):
                is_signed = True
                signature_block = get_signed_data(content)
                if signature_block is not None:
                    signature_blocks.append(signature_block)
                    self.log_message('found signature block in content')

        self.log_message('total signature blocks: {}'.format(len(signature_blocks)))

        return signature_blocks

    def remove_pgp_signature_blocks(self):
        '''
            Remove the PGP signature blocks, if there are any.

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> with open(get_plain_message_name('pgp-signature.txt')) as input_file:
            ...     mime_message = EmailMessage(input_file)
            ...     mime_message.remove_pgp_signature_blocks()
            ...     signature_blocks = mime_message.get_pgp_signature_blocks()
            ...     len(signature_blocks) == 0
            True
        '''

        def remove_signature(content):
            ''' Remove the signature from the content. '''

            # remove the beginning signature lines
            if content.startswith(oce_constants.BEGIN_PGP_SIGNED_MESSAGE):
                begin_sig_lines = ''
                for line in content.split('\n'):
                    if len(line.strip()) <= 0:
                        break
                    else:
                        begin_sig_lines += '{}\n'.format(line)
                content = content[len(begin_sig_lines):]


            # remove the signature itself
            start_index = content.find(oce_constants.BEGIN_PGP_SIGNATURE)
            end_index = content.find(oce_constants.END_PGP_SIGNATURE)
            content = content[0:start_index] + content[end_index + len(oce_constants.END_PGP_SIGNATURE):]

            # remove the extra characters added around the message itself
            content = content.replace('- {}'.format(oce_constants.BEGIN_PGP_MESSAGE), oce_constants.BEGIN_PGP_MESSAGE)
            content = content.replace('- {}'.format(oce_constants.END_PGP_MESSAGE), oce_constants.END_PGP_MESSAGE)

            return content

        try:
            if self.get_message().is_multipart():
                self.log_message('check each of {} parts of message for a signature'.format(
                    len(self.get_message().get_payload())))
                part_index = 0
                parts = self.get_message().get_payload()
                for part in parts:
                    part_index += 1
                    if isinstance(part, str):
                        content = part
                    else:
                        content = self._get_decoded_payload(part)
                    if self.contains_pgp_signature_delimeters(content):
                        charset, __ = get_charset(part)
                        self.log_message('set payload after removing sig with char set: {}'.format(charset))
                        part.set_payload(remove_signature(content), charset=charset)
                        self.log_message('extracted signature block from part {}'.format(part_index))

            else:
                content = self._get_decoded_payload(self.get_message())
                if isinstance(content, str) and self.contains_pgp_signature_delimeters(content):
                    charset, __ = get_charset(part)
                    self.get_message().set_payload(remove_signature(content), charset=charset)
                    self.log_message('extracted signature block from content with char set: {}'.format(charset))
        except:
            self.log_message('EXCEPTION see syr.exception.log')
            record_exception()

    def write_to(self, output_file):
        '''
            Write message to the specified file.

            >>> from goodcrypto.mail.utils.dirs import get_test_directory
            >>> from goodcrypto_tests.mail.message_utils import get_encrypted_message_name
            >>> filename = get_encrypted_message_name('iso-8859-1-binary.txt')
            >>> with open(filename) as input_file:
            ...     output_dir = get_test_directory()
            ...     output_filename = os.path.join(output_dir, 'test-message.txt')
            ...     mime_message = EmailMessage(input_file)
            ...     with open(output_filename, 'w') as out:
            ...         mime_message.write_to(out)
            ...         os.path.exists(output_filename)
            ...         mime_message.write_to(out)
            ...     os.path.exists(output_filename)
            ...     os.remove(output_filename)
            True
            True
            True
            True

            if os.path.exists(output_filename):
                os.remove(output_filename)
        '''

        result_ok = False
        try:
            if isinstance(output_file, IOBase):
                if output_file.closed:
                    with open(output_file.name, 'w') as out:
                        out.write(self.to_string())
                        out.flush()
                else:
                    output_file.write(self.to_string())
                    output_file.flush()

            elif isinstance(output_file, StringIO):
                output_file.write(self.to_string())

            else:
                with open(output_file, 'w') as out:
                    out.write(self.to_string())
                    out.flush()

            result_ok = True
        except Exception:
            record_exception()
            raise Exception

        return result_ok


    def to_string(self, charset=None, mangle_from=False):
        '''
            Convert message to a string.

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> filename = get_plain_message_name('basic.txt')
            >>> with open(filename) as input_file:
            ...     file_content = input_file.read().replace('\\r\\n', '\\n')
            ...     position = input_file.seek(os.SEEK_SET)
            ...     email_message = EmailMessage(input_file)
            ...     file_content.strip() == email_message.to_string().strip()
            True
        '''

        string = None

        try:
            msg = self._message
            if charset is None:
                charset, __ = get_charset(msg, self._last_charset)
                self.log_message('char set in to_string(): {}'.format(charset))

            #  convert the message
            try:
                file_pointer = StringIO()
                message_generator = Generator(file_pointer, mangle_from_=mangle_from, maxheaderlen=78)
                message_generator.flatten(msg)
                string = file_pointer.getvalue()
            except Exception as AttributeError:
                try:
                    self.log_message('unable to flatten message')
                    record_exception(AttributeError)

                    msg = self._message
                    string = msg.as_string()
                except Exception as AttributeError:
                    #  we explicitly want to catch everything here, even NPE
                    self.log_message('unable to convert message as_string')

                    string = '{}\n\n{}'.format(
                        '\n'.join(self.get_header_lines()),
                        '\n'.join(self.get_content_lines()))

                    if self.DEBUGGING: self.log_message("message string:\n{}".format(string))

        except IOError as io_error:
            self.last_error = io_error
            self.log_message(io_error)

        except MessageException as msg_exception:
            self.last_error = msg_exception
            self.log_message(msg_exception)

        return string


    def get_header_lines(self):
        '''
            Get message headers as a list of lines.

            The lines follow RFC 2822, with a maximum of 998 characters per line.
            Longer headers are folded using a leading tab.

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> filename = get_plain_message_name('basic.txt')
            >>> with open(filename) as input_file:
            ...     email_message = EmailMessage(input_file)
            ...     len(email_message.get_header_lines()) > 0
            True
        '''

        max_line_length = 998

        lines = []
        keys = self._message.keys()
        for key in keys:
            value = self.get_header(key)
            if value is None:
                value = ''
            raw_line = '{}: {}'.format(key, value)
            if len(raw_line) > max_line_length:

                #  add first line from this header
                part_line = raw_line[0:max_line_length]
                lines.append(part_line)
                raw_line = raw_line[:max_line_length]

                #  add continuation lines
                while len(raw_line) > max_line_length:
                    #  make space for leading tab
                    part_line = raw_line[0:max_line_length - 1]
                    lines.append("\t" + part_line)
                    raw_line = raw_line[max_line_length - 1:]

            if len(raw_line) > 0:
                lines.append(raw_line)

        return lines


    def get_content_lines(self):
        '''
            Gets the message content as a list of lines.

            This is the part of the message after the header and the separating blank
            line, with no decoding.

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> filename = get_plain_message_name('basic.txt')
            >>> with open(filename) as input_file:
            ...     email_message = EmailMessage(input_file)
            ...     len(email_message.get_content_lines()) > 0
            True
        '''

        lines = []
        payloads = self._message.get_payload()
        if payloads is None:
            self.log_message('No content')
        else:
            if isinstance(payloads, str):
                lines = payloads.split('\n')
            else:
                for payload in payloads:
                    if isinstance(payload, Message):
                        lines += payload.as_string()
                    else:
                        lines += payload.split('\n')

        return lines

    def _parse_header_line(self, line, last_name):
        '''
            Parse a header line (internal user only).

            >>> email_message = EmailMessage()
            >>> name, value, last_name = email_message._parse_header_line(
            ...   'Mime-Version: 1.0', 'Subject')
            >>> name == 'Mime-Version'
            True
            >>> value == '1.0'
            True
        '''

        if line is None:
            name = value = last_name = None
        else:
            name, __, value = line.partition(':')
            if name is not None:
                name = name.strip()

            if name is None or len(name) <= 0:
                self.log_message("no header name in line: " + line)
                if last_name is not None:
                    old_value = self.get_header(last_name)
                    self.add_header(name, '{} {}\n'.format(old_value.strip('\n'), value.strip()))
            else:
                last_name = name
                if value is None:
                    value = ''
                else:
                    value = value.strip()

            try:
                # try adding the header line and see if python can parse it
                test_message = Message()
                test_message.__setitem__(name, value)
                if isinstance(self.parser, Parser):
                    temp_header = self.parser.parsestr(test_message.as_string(unixfrom=False))
                else:
                    temp_header = self.parser.parsebytes(test_message.as_string(unixfrom=False).encode())
                if temp_header.__len__() == 0:
                    self.log_message('bad header: {}'.format(line))
                    self.bad_header_lines.append(line)
                else:
                    # if the parser accept this header line, then keep it
                    self.add_header(name, value)
            except Exception:
                record_exception()
                self.bad_header_lines.append(line)

        return name, value, last_name

    def _set_content_encoding(self, name, value):
        '''
            Set encoding in content (internal use only).

            >>> email_message = EmailMessage()
            >>> email_message._set_content_encoding(
            ...   mime_constants.CONTENT_TYPE_KEYWORD, 'charset=utf-8')
        '''

        if name is None or value is None:
            self.log_message('no name or value defined while trying to set content encoding')

        elif name == mime_constants.CONTENT_TYPE_KEYWORD:
            try:
                # try to set the charset
                index = value.find('charset=')
                if index >= 0:
                    charset = value[index + len('charset='):]
                    if charset.startswith('"') and charset.endswith('"'):
                        charset = charset[1:len(charset)-1]
                    self._message.set_charset(charset)
            except Exception:
                record_exception()
                self._message.set_charset(constants.DEFAULT_CHAR_SET)

        elif name == mime_constants.CONTENT_XFER_ENCODING_KEYWORD:
            encoding_value = self._message.get(
               mime_constants.CONTENT_XFER_ENCODING_KEYWORD)
            self.log_message('message encoding: {}'.format(encoding_value))
            if encoding_value is None or encoding_value.lower() != value.lower():
                self._message.__delitem__(name)
                self._message.__setitem__(name, value)
                self.log_message('set message encoding: {}'.format(value))

    def _get_decoded_payload(self, msg, decode=True):
        '''
            Get the payload and decode it if necessary.

            >>> email_message = EmailMessage()
            >>> email_message._get_decoded_payload(None)
        '''
        if msg is None:
            payload = None
        else:
            payload = msg.get_payload(decode=decode)

            if isinstance(payload, bytearray) or isinstance(payload, bytes):
                charset, __ = get_charset(msg, self._last_charset)
                self.log_message('decoding payload with char set: {}'.format(charset))
                try:
                    payload = payload.decode(encoding=charset)
                except:
                    payload = payload.decode(encoding=charset, errors='replace')


        return payload

    def _create_new_header(self, message_string):
        '''
            Create a new header from a corrupted message (internal use only).

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> with open(get_plain_message_name('basic.txt')) as input_file:
            ...    message_string = ''.join(input_file.readlines())
            ...    email_message = EmailMessage()
            ...    body_text_lines = email_message._create_new_header(message_string)
            ...    len(body_text_lines) > 0
            True
        '''

        last_name = None
        body_text_lines = None

        if message_string is None:
            self.log_message('no message string defined to create new header')
        else:
            self.log_message('starting to parse headers')
            lines = message_string.split('\n')
            header_count = 0
            for line in lines:

                if line is None or len(line.strip()) <= 0:
                    self.log_message('finished parsing headers')
                    if header_count + 1 <= len(lines):
                        body_text_lines = lines[header_count + 1:]
                    else:
                        body_text_lines = []
                    break

                else:
                    header_count += 1
                    name, value, last_name = self._parse_header_line(line, last_name)

                    if (name is not None and
                        (name == mime_constants.CONTENT_TYPE_KEYWORD or
                         name == mime_constants.CONTENT_XFER_ENCODING_KEYWORD) ):

                        self._set_content_encoding(name, value)

        return body_text_lines


    def _create_new_body_text(self, body):
        '''
            Create the body text from a corrupted message.

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> with open(get_plain_message_name('basic.txt')) as input_file:
            ...    email_message = EmailMessage(input_file.readlines())
            ...    email_message._create_new_body_text('Test new body text')
        '''

        charset, __ = get_charset(self._message, self._last_charset)
        self.log_message('creating new body text with char set: {}'.format(charset))
        try:
            body_text = ''
            for line in body:
                body_text += line.encode(charset)
        except Exception as body_exception:
            self.log_message(body_exception)
            record_exception()
            body_text = ''.join(body)

        if len(self.bad_header_lines) > 0:
            body_text += '\n\n{}\n'.format(i18n('Removed bad header lines'))
            for bad_header_line in self.bad_header_lines:
                body_text += '  {}\n'.format(bad_header_line)

        self._message.set_payload(body_text, charset=charset)

    def _create_good_message_from_bad(self, source):
        '''
            Create a good message from a source that contains a corrupted message.

            >>> from goodcrypto_tests.mail.message_utils import get_plain_message_name
            >>> with open(get_plain_message_name('bad-basic.txt')) as input_file:
            ...    email_message = EmailMessage()
            ...    email_message._create_good_message_from_bad(input_file)
        '''

        try:
            # start with a fresh message
            self._message = Message()

            if isinstance(source, IOBase):
                source.seek(os.SEEK_SET)
                message_string = source.read()
            else:
                message_string = source

            body_text = self._create_new_header(message_string)
            if body_text:
                self._create_new_body_text(body_text)

        except Exception as message_exception:
            self.log_message(message_exception)
            record_exception()
            raise MessageException(message_exception)

    def init_new_message(self, from_addr, to_addr, subject, text=None):
        ''' Initialize a basic new message.

            Used primarily for testing.

            >>> # In honor of Kirk Wiebe, a whistleblower about Trailblazer, an NSA mass surveillance project.
            >>> from_user = '******'
            >>> to_user = '******'
            >>> email_message = EmailMessage()
            >>> email_message.init_new_message(from_user, to_user, "Test message", 'Test body text')
        '''

        self.add_header(mime_constants.FROM_KEYWORD, from_addr)
        self.add_header(mime_constants.TO_KEYWORD, to_addr)
        self.add_header(mime_constants.SUBJECT_KEYWORD, subject)

        if text:
            self.set_text(text)


    def log_message_exception(self, exception_error, message, log_msg):
        '''
            Log an exception.

            >>> from syr.log import BASE_LOG_DIR
            >>> from syr.user import whoami
            >>> email_message = EmailMessage()
            >>> email_message.log_message_exception(Exception, 'message', 'log message')
            >>> os.path.exists(os.path.join(BASE_LOG_DIR, whoami(), 'goodcrypto.mail.message.email_message.log'))
            True
            >>> os.path.exists(os.path.join(BASE_LOG_DIR, whoami(), 'syr.exception.log'))
            True
        '''

        self.log_exception(log_msg, message_exception=exception_error)
        if message != None:
            try:
                self.log_message("message:\n" + message.to_string())
            except Exception as exception_error2:
                self.log_message("unable to log message: {}".format(exception_error2))


    def log_exception(self, log_msg, message_exception=None):
        '''
            Log an exception.

            >>> from syr.log import BASE_LOG_DIR
            >>> from syr.user import whoami
            >>> email_message = EmailMessage()
            >>> email_message.log_exception('test')
            >>> os.path.exists(os.path.join(BASE_LOG_DIR, whoami(), 'goodcrypto.mail.message.email_message.log'))
            True
            >>> os.path.exists(os.path.join(BASE_LOG_DIR, whoami(), 'syr.exception.log'))
            True
            >>> email_message.log_exception('test', message_exception='message exception')
        '''

        record_exception()

        self.log_message(log_msg)
        record_exception(message=log_msg)

        if message_exception is not None:
            if type(message_exception) == Exception:
                self.log_message(message_exception.value)
                record_exception(message=message_exception.value)
            elif type(message_exception) == str:
                self.log_message(message_exception)
                record_exception(message=message_exception)

    def log_message(self, message):
        '''
            Log a message.

            >>> from syr.log import BASE_LOG_DIR
            >>> from syr.user import whoami
            >>> email_message = EmailMessage()
            >>> email_message.log_message('test')
            >>> os.path.exists(os.path.join(BASE_LOG_DIR, whoami(), 'goodcrypto.mail.message.email_message.log'))
            True
        '''

        if self._log is None:
            self._log = LogFile()

        self._log.write_and_flush(message)
Example #18
0
email_id = data[0].split()

if not email_id:
    print_error("No mail found")
    exit(1)
else:
    print_info("You've got mail!")

parser = Parser()

# create image dir
utilities.mkchdir("out")

for d in email_id:
    result, data = mail.fetch(d, "(RFC822)")
    parsedbytes = parser.parsebytes(data[0][1])

    # create directory using date and user email
    from_ = parsedbytes.get("From")
    utilities.mkchdir(replace(from_, {"\\<": "", "\\>": "", '\\"': ""}))

    # save images and extract urls
    for part in parsedbytes.walk():
        content_type = part.get("Content-Type")
        # save image
        if content_type.startswith("image"):
            decode_save_image(part)
        # save links
        elif content_type.startswith("text/plain"):
            # convert to regular text if encoded
            if part.get("Content-Transfer-Encoding") == "base64":
Example #19
0
    sys.exit()

# Getting all unseen mail

result, data = mail.search(None, 'unseen')
if len(data[0]) == 0:
    logger.info('No unseen mails!')
    logger.info(mail.logout())
    logger.info(s.quit())
else:
    ids = data[0].split()  # getting unseen letters id list
    msgs = []
    for x in ids:
        result, data = mail.fetch(x, 'RFC822')
        parser = BytesParser()
        msg = parser.parsebytes(data[0][1])
        # changing "To" and "From" fields in header
        msg.__delitem__('To')
        msg.__setitem__('To', smtp_send_to_header)
        msg.__delitem__('From')
        msg.__setitem__('From', smtp_login)
        msgs.append(msg)
    logger.info('Have %d new letters', len(ids))
    logger.info('Close imap protocol')
    logger.info(mail.logout())

    # sending messages

    i = 0
    for msg in msgs:
        logger.debug('Trying to send message')
def process(config_ini, limit=None):
    config = SafeConfigParser()
    config.read(config_ini)

    destination_name = config.get('destination', 'name')

    log.info('connecting to source server')
    connection = IMAP_CLS[config.getboolean('source', 'ssl')](config.get(
        'source', 'server'), config.getint('source', 'port'))
    connection.login(config.get('source', 'login'),
                     config.get('source', 'password'))

    mailbox = config.get('source', 'mailbox')
    log.debug('selecting mailbox %s', mailbox)
    res = connection.select('"{}"'.format(mailbox))
    if res[0] != 'OK':
        raise Exception(res[1])

    log.debug('searching messages')
    if config.has_option('source', 'imap_search'):
        res = connection.uid('search', None,
                             config.get('source', 'imap_search'))
    else:
        res = connection.uid('search', None, 'LARGER',
                             str(config.getint('source', 'email_min_size')))

    message_uids = res[1][0].split()
    total_messages = total_process = len(message_uids)
    if limit is not None:
        total_process = min([total_messages, limit])

    parser = BytesParser()
    processed_mailbox = config.get('source', 'processed_mailbox')
    destination = DESTINATIONS[config.get('destination',
                                          'type')](config['destination'])

    log.info('found %s messages to process', total_messages)
    for idx, uid in enumerate(message_uids):
        if idx >= total_process:
            break

        log.info('processing %s/%s...', idx + 1, total_process)

        log.debug('downloading and parsing message...')
        res = connection.uid('fetch', uid, '(FLAGS BODY.PEEK[])')
        message = parser.parsebytes(res[1][0][1])
        flags = re.findall(r'FLAGS (\(.*?\))', res[1][0][0].decode('utf8'))[0]

        payload = message.get_payload()
        textmsg = payload[0]

        bkp_identifier = str(uuid.uuid4())

        content_type = textmsg['Content-Type'].lower()
        if 'multipart/related' in content_type:
            log.warn('multipart/related not supported, skipping...')
            continue
        elif 'multipart/alternative' in content_type:
            text, html = textmsg.get_payload()
            add_saved_notice(text, destination_name, bkp_identifier)
            add_saved_notice(html, destination_name, bkp_identifier)
        elif 'text/plain' in content_type or 'text/html' in content_type:
            add_saved_notice(textmsg, destination_name, bkp_identifier)
        else:
            raise NotImplementedError

        log.debug('backup identifier: %s', bkp_identifier)

        destitem = destination.new(bkp_identifier,
                                   format_relevant_headers(message))

        attachments = payload[1:]
        if not len(attachments):
            log.debug('no attachments to save')
            continue

        for attachment in attachments:
            destitem.add(attachment)
            del payload[1]

        log.debug('saving attachments to destination...')
        destitem.save()

        log.debug('moving processed message to mailbox %s', processed_mailbox)
        res = connection.xatom(
            'UID MOVE', '{} "{}"'.format(uid.decode('utf8'),
                                         processed_mailbox))
        if res[0] != 'OK':
            raise Exception(res[1])

        log.debug('storing stripped message')
        connection.append(mailbox, flags, datetime.now(timezone.utc),
                          message.as_bytes())

    destination.finalize()
    log.debug('logging out')
    connection.logout()

    log.info('done')
Example #21
0
M = imaplib.IMAP4('mail.livemail.co.uk', 143)
f = open(".account.txt", "r")
lines = f.readlines()
username = lines[0]
password = lines[1]
f.close()
M.login(username[0:18], password)
M.select('"Junk Email"')
resp, data = M.uid('search', None, "ALL")  # search and return Uids

uids = data[0].split()
mailparser = BytesParser()
for uid in uids:
    #resp,data = M.uid('fetch',uid,'(RFC822)')
    resp, data = M.uid('fetch', uid, "(BODY[TEXT])")
    msg = mailparser.parsebytes(data[0][1])
    #print (msg)
    print(data[0][1])
'''
for uid in uids:
    #resp,data = M.uid('fetch',uid,'(RFC822)')
    resp,data = M.uid('fetch',uid,"(BODY[HEADER])")
    msg = mailparser.parsebytes(data[0][1])
    #msg = str(data[0][1])
    domain = msg['From'].split('@') 
    try:
        #handle.append(msg)
        handle.append(domain[1])
        #print (domain[0])
    except:
        print ('Error: '+domain[0])
Example #22
0
def main(data, context):
    SpamTo = [
        '<*****@*****.**>', '<*****@*****.**>', '<*****@*****.**>',
        '<*****@*****.**>', '<*****@*****.**>', '<*****@*****.**>',
        '"*****@*****.**" <*****@*****.**>', '*****@*****.**',
        '<*****@*****.**>', '<*****@*****.**>', '<*****@*****.**>',
        '*****@*****.**'
    ]
    subjectj = ['fungus', 'moncler']
    SpamFrom = ['patelsheila', 'sheila patel']
    tlds = [
        '.icu>', '.br>', '.bid>', '.date>', '.loan>', '.trade>', '.ga>',
        '.cf>', '.ml>', '.ar>', '.gq>', '.tk>', '.space>', '.site>', '.top>',
        '.xyz>', 'patriotbundle.com', 'conservazon.com>', 'lifeproofshoes',
        'gotmail4u', '.cyou>', '.monster>', '.guru>'
    ]
    tldsj = [
        '.icu>', '.br>', '.bid>', '.date>', '.loan>', '.trade>', '.ga>',
        '.cf>', '.ml>', '.ar>', '.gq>', '.tk>', '.info>', '.review>',
        '.online>', '.website>', '.space>', '.site>', '.top>', '.win>',
        '.life>', '.xyz>', 'patriotbundle.com', 'conservazon.com>',
        'lifeproofshoes', 'gotmail4u', '.cyou>', '.monster>', '.guru>'
    ]
    tldsj2 = [
        '.ltd>', 'americanas.com', '.rest', '.us>', '.pw>', '.press>',
        '.club>', '.live>', '.pro>', '.download>', '.xyz>', '.today>',
        '.casa>', '.world>', '.live>', '.id>', '.site>', '.cyou>', '.monster>',
        '.guru>'
    ]
    goods = [
        'coursera', 'creditkarma', 'holidayextras', 'carbmanager', 'ntask',
        'fool.co.uk', 'apple.com', 'evite.com', 'moneysavingexpert', 'edx.org',
        'mailtoself', 'sourceforge', 'channel4', 'stubhub', 'bankofamerica',
        'deloitte', 'trainline', 'cvsspecialty', 'halifax', 'priceline',
        'heliohost', 'ticketmaster', 'lastpass', 'vintage-inns', 'mandsbank',
        'spiritairlines', 'taxdisc', 'dds.ga', 'delta', 'upwork', 'telegraph',
        'tvguide.co.uk', 'heathrow'
    ]
    newss = [
        'nextdoor.com', 'nextdoor.co.uk', 'morningbrew.com', '@medium.com',
        'dowjones.com', 'todoist.com', 'linkedin.com', 'tvguide.co.uk',
        'realpython', 'cmwf.org', 'trello.com', 'thesundaytimes.co.uk',
        'thetimes.co.uk', 'nytimes.com', 'wsj.com', 'getpocket.com',
        'pinterest.com', 'thepointsguy.co.uk', 'freecycle.org'
    ]
    deleted = []
    now = datetime.datetime.now()
    nowaware = now.replace(tzinfo=datetime.timezone.utc)
    delta = datetime.timedelta(1)
    delta2 = datetime.timedelta(2)
    M = imaplib.IMAP4('mail.livemail.co.uk', 143)
    f = open(".account.txt", "r")
    lines = f.readlines()
    username = lines[0].rstrip('\n')
    password = lines[1].rstrip('\n')
    f.close()
    M.login(username, password)
    M.select('"Junk Email"')
    resp, data = M.uid('search', None, "ALL")  # search and return Uids
    uids = data[0].split()
    mailparser = BytesParser()
    for uid in uids:
        delflag = 0
        resp, data = M.uid('fetch', uid, "(BODY[HEADER])")
        msg = mailparser.parsebytes(data[0][1])
        print(msg['To'], msg['From'], msg['Subject'])
        for good in goods:
            if good in str(msg['From']).lower():
                result = M.uid('COPY', uid, 'Inbox')
                if result[0] == 'OK':
                    M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                    delflag = 1
        #M.uid('STORE',uid, '-FLAGS', '(\\Seen)')
        if ('ashley' in str(msg['Subject']).lower()
                or 'ashley' in str(msg['From']).lower()) and (
                    'madison' in str(msg['Subject']).lower()
                    or 'madison' in str(msg['From']).lower()):
            M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
            deleted.append(str(msg['From']) + ' ' + str(msg['Subject']))
            delflag = 1
        for subj in subjectj:
            if subj in str(msg['Subject']).lower():
                M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                deleted.append(str(msg['From']) + ' ' + str(msg['Subject']))
                delflag = 1
        for tld in tldsj:
            if tld in str(msg['From']).lower():
                M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                deleted.append(str(msg['From']) + ' ' + str(msg['Subject']))
                delflag = 1
        for spamf in SpamFrom:
            if spamf in str(msg['From']).lower():
                M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                deleted.append(str(msg['From']) + ' ' + str(msg['Subject']))
                delflag = 1
        for spam in SpamTo:
            if spam in str(msg['To']).lower():
                M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                deleted.append(
                    str(msg['To']) + ' ' + str(msg['From']) + ' ' +
                    str(msg['Subject']))
                delflag = 1
        #result1 = M.uid('FETCH',uid,'(FLAGS)') previously used to detect if //Deleted flag present but changed to using delflag instead
        if (delflag == 0):
            for tld in tldsj2:
                if tld in str(msg['From']).lower():
                    result = M.uid('COPY', uid, 'Junk-Likely')
                    if result[0] == 'OK':
                        M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                        deleted.append(
                            str(msg['From']) + ' ' + str(msg['Subject']))
            try:
                if float(msg['X-Spam-Score']) > 4:
                    result = M.uid('COPY', uid, 'Junk-HighSpam')
                    if result[0] == 'OK':
                        M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
            except:
                print()

    M.expunge()

    M.select('Inbox')
    resp, data = M.uid('search', None, "UNSEEN")  # search and return Uids
    uids = data[0].split()
    mailparser = BytesParser()
    for uid in uids:
        delflag = 0
        resp, data = M.uid('fetch', uid, "(BODY[HEADER])")
        msg = mailparser.parsebytes(data[0][1])
        print(msg['To'], msg['From'], msg['Subject'])

        for tld in tlds:
            if tld in str(msg['From']).lower():
                M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                deleted.append(str(msg['From']) + ' ' + str(msg['Subject']))
                delflag = 1
        if str(msg['To']).lower() in SpamTo:
            M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
            deleted.append(
                str(msg['To']) + ' ' + str(msg['From']) + ' ' +
                str(msg['Subject']))
            delflag = 1
        #result1 = M.uid('FETCH',uid,'(FLAGS)') previously used to detect if //Deleted flag present but changed to using delflag instead
        if (delflag == 0):
            for tld in tldsj2:
                if tld in str(msg['From']).lower():
                    result = M.uid('COPY', uid, 'Junk-Likely')
                    if result[0] == 'OK':
                        M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                        deleted.append(
                            str(msg['From']) + ' ' + str(msg['Subject']))
        M.uid('STORE', uid, '-FLAGS', '(\\Seen)')

    M.expunge()

    M.select('"Inbox"')
    resp, data = M.uid('search', None,
                       "UNSEEN")  # search and return unread Uids
    unseenuids = data[0].split()
    resp, data = M.uid('search', None, "ALL")  # search and return all Uids
    uids = data[0].split()
    mailparser = BytesParser()
    for uid in uids:
        resp, data = M.uid('fetch', uid, "(BODY[HEADER])")
        msg = mailparser.parsebytes(data[0][1])
        for news in newss:
            if news in str(msg['From']).lower():
                print('yes' + ', ' + msg['Date'])
                print(msg['To'], msg['From'], msg['Subject'])
                try:
                    msgdate = datetime.datetime.strptime(
                        msg['Date'][:31], '%a, %d %b %Y %H:%M:%S %z')
                    if nowaware - msgdate > delta:
                        print(msg['Date'])
                        print(msg['Date'][:31])
                        M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                except:
                    print('Error: ' + msg['Date'] + ' ' + msg['From'])
                    print(msg['Date'][:31])
                    try:
                        msgdate = datetime.datetime.strptime(
                            msg['Date'][:30], '%a, %d %b %Y %H:%M:%S %z')
                        if nowaware - msgdate > delta:
                            print(msg['Date'])
                            print(msg['Date'][:30])
                            M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                    except:
                        print('Error: ' + msg['Date'] + ' ' + msg['From'])
                        print(msg['Date'][:30])
                        try:
                            msgdate = datetime.datetime.strptime(
                                msg['Date'][:25], '%a, %d %b %Y %H:%M:%S')
                            if datetime.datetime.now() - msgdate > delta2:
                                print(msg['Date'])
                                print(msg['Date'][:25])
                                M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
                        except:
                            print('Error: ' + msg['Date'] + ' ' + msg['From'])
                            print(msg['Date'][:25])

            if uid in unseenuids:
                M.uid('STORE', uid, '-FLAGS',
                      '(\\SEEN)')  #make unread messages unread again
    M.expunge()

    M.select('"Freecycle"')
    resp, data = M.uid('search', None, "ALL")  # search and return Uids
    uids = data[0].split()
    mailparser = BytesParser()
    for uid in uids:
        resp, data = M.uid('fetch', uid, "(BODY[HEADER])")
        msg = mailparser.parsebytes(data[0][1])
        msgdate = datetime.datetime.strptime(msg['Date'][:31],
                                             '%a, %d %b %Y %H:%M:%S %z')
        if nowaware - msgdate > delta:
            print(msg['Date'])
            print(msg['Date'][:31])
            M.uid('STORE', uid, '+FLAGS', '(\\Deleted)')
    M.expunge()

    M.close()
    M.logout()
Example #23
0
def recv(popserver, popport, user, pwd):
    """
    参数:

        基本跟 send 函数一样

    返回:

        二元组 (title , msg_content)
        都已经是 str 了


    注意:

        1. 如果发送的时候, 未连接网络, 会抛出 socket.gaierror 异常

    """

    from pprint import pprint as pp
    #登陆 POP
    pop = poplib.POP3(popserver, popport)
    pop.set_debuglevel(0)
    pop.user(user)
    pop.pass_(pwd)

    # response = pop.list()
    # pp(response)

    #获得最新一封的邮件
    #--邮箱状态 (邮件数量, 邮箱大小)
    msg_count, mailbox_size = pop.stat()
    #--lines : list(bytes)
    response, lines, octs = pop.retr(msg_count)
    msg = b"\r\n".join(lines)
    # pp(lines)

    #将获得的消息, 转换成 Message 对象
    msg_parser = BytesParser()
    # msg_obj : email.message.Message
    msg_obj = msg_parser.parsebytes(msg)

    #获得 标题
    #decoe_header -> [(title:bytes,charset:str)]
    #如果标题,没有使用编码, 就是ascii,就不会有 charset , (被设置为 None)
    r = decode_header(msg_obj['Subject'])
    debug_log(msg_obj['Subject'])
    debug_log(r)
    title = r[0][0]
    charset = r[0][1]
    if charset:
        title = title.decode(charset)

    #获得消息内容
    body = ""
    for part in msg_obj.walk():
        if (part.get_content_type() == "text/plain"):
            charset = part.get_content_charset()
            body = part.get_payload(decode=True)
            if charset:
                body = body.decode(charset)

    debug_log("emailhelper 接受消息成功。 title : {}".format(title))

    return title, body
Example #24
0
def process(options, mboxes: List[str]):
    serverclass: Type[Any]
    if options.process:
        serverclass = imaplib.IMAP4_stream
    elif options.ssl:
        serverclass = imaplib.IMAP4_SSL
    else:
        serverclass = imaplib.IMAP4

    try:
        if options.process:
            server = serverclass(options.process)
        elif options.port:
            server = serverclass(options.server, options.port)
        else:
            # Use the default, which will be different depending on SSL choice
            server = serverclass(options.server)
    except socket.error as e:
        sys.stderr.write(
            "\nFailed to connect to server. Might be host, port or SSL settings?\n"
        )
        sys.stderr.write("%s\n\n" % e)
        sys.exit(1)

    #  server.debug = 4  # If you want to see what's going on

    if ("STARTTLS" in server.capabilities) and hasattr(server, "starttls"):
        server.starttls()
    elif options.starttls:
        sys.stderr.write("\nError: Server did not offer TLS\n")
        sys.exit(1)
    elif not options.ssl:
        sys.stderr.write("\nWarning: Unencrypted connection\n")

    try:
        if not options.process:
            server.login(options.user, options.password)
    except:
        sys.stderr.write("\nError: Login failed\n")
        sys.exit(1)

    # List mailboxes option
    # Just do that and then exit
    if options.just_list:
        for mb in get_mailbox_list(server):
            print(mb)
        return

    if len(mboxes) == 0:
        sys.stderr.write("\nError: Must specify mailbox\n")
        sys.exit(1)

    # Recursive option
    # Add child mailboxes to mboxes
    if options.recursive:
        # Make sure mailbox name is surrounded by quotes if it contains a space
        parent = add_quotes(mboxes[0])
        # Fetch the hierarchy delimiter
        bits = parse_list_response(
            check_response(server.list(parent, '""'))[0])
        delimiter = bits[1].decode()
        pattern = '"' + delimiter + '*"'
        for mb in get_mailbox_list(server, parent, pattern):
            mboxes.append(mb)
        print(
            "Working recursively from mailbox %s. There are %d total mailboxes."
            % (parent, len(mboxes)))

    if options.reverse:
        mboxes.reverse()

    if len(mboxes) > 1:
        print("Working with mailboxes in order: %s" % (", ".join(mboxes)))

    # OK - let's get started.
    # Iterate through a set of named mailboxes and delete the later messages discovered.
    try:
        parser = BytesParser()  # can be the same for all mailboxes
        # Create a list of previously seen message IDs, in any mailbox
        msg_ids: Dict[str, str] = {}
        for mbox in mboxes:
            msgs_to_delete = []  # should be reset for each mbox
            msg_map = {}  # should be reset for each mbox

            # Make sure mailbox name is surrounded by quotes if it contains a space
            mbox = add_quotes(mbox)

            # Select the mailbox
            msgs = check_response(
                server.select(mailbox=mbox, readonly=options.dry_run))[0]
            print("There are %d messages in %s." % (int(msgs), mbox))

            # Check how many messages are already marked 'deleted'...
            numdeleted = len(get_deleted_msgnums(server, options.sent_before))
            print("%s message(s) currently marked as deleted in %s" %
                  (numdeleted or "No", mbox))

            # Now get a list of the ones that aren't deleted.
            # That's what we'll actually use.
            msgnums = get_undeleted_msgnums(server, options.sent_before)
            print("%s others in %s" % (len(msgnums), mbox))

            chunkSize = 100
            if options.verbose:
                print("Reading the others... (in batches of %d)" % chunkSize)

            for i in range(0, len(msgnums), chunkSize):
                if options.verbose:
                    print("Batch starting at item %d" % i)

                # and parse them.
                for mnum, hinfo in get_msg_headers(server,
                                                   msgnums[i:i + chunkSize]):
                    # Parse the header info into a Message object
                    mp = parser.parsebytes(hinfo)

                    if options.verbose:
                        print("Checking %s message %s" % (mbox, mnum))
                        # Store message only when verbose is enabled (to print it later on)
                        msg_map[mnum] = mp

                    # Record the message-ID header (or generate one from other headers)
                    msg_id = get_message_id(mp, options.use_checksum,
                                            options.use_id_in_checksum)

                    if msg_id:
                        # If we've seen this message before, record it as one to be
                        # deleted in this mailbox.
                        if msg_id in msg_ids:
                            print(
                                "Message %s_%s is a duplicate of %s and %s be %s"
                                % (
                                    mbox,
                                    mnum,
                                    msg_ids[msg_id],
                                    options.dry_run and "would" or "will",
                                    "tagged as '%s'" % TAG_NAME if
                                    options.only_tag else "marked as deleted",
                                ))
                            if options.show or options.verbose:
                                print("Subject: %s\nFrom: %s\nDate: %s\n" %
                                      (mp["Subject"], mp["From"], mp["Date"]))
                            msgs_to_delete.append(mnum)
                        # Otherwise just record the fact that we've seen it
                        else:
                            msg_ids[msg_id] = f"{mbox}_{mnum}"

                print(("%s message(s) in %s processed" %
                       (min(len(msgnums), i + chunkSize), mbox)))

            # OK - we've been through this mailbox, and msgs_to_delete holds
            # a list of the duplicates we've found.

            if len(msgs_to_delete) == 0:
                print("No duplicates were found in %s" % mbox)

            else:
                if options.verbose:
                    print("These are the duplicate messages: ")
                    for mnum in msgs_to_delete:
                        print_message_info(msg_map[mnum])

                if options.dry_run:
                    print("If you had NOT selected the 'dry-run' option,\n"
                          "  %i messages would now be %s." % (
                              len(msgs_to_delete),
                              "tagged as '%s'" % TAG_NAME
                              if options.only_tag else "marked as deleted",
                          ))

                else:
                    if options.only_tag:
                        print("Tagging %i messages as '%s'..." %
                              (len(msgs_to_delete), TAG_NAME))
                    else:
                        print("Marking %i messages as deleted..." %
                              (len(msgs_to_delete)))
                    # Deleting messages one at a time can be slow if there are many,
                    # so we batch them up.
                    chunkSize = 30
                    if options.verbose:
                        print("(in batches of %d)" % chunkSize)
                    for i in range(0, len(msgs_to_delete), chunkSize):
                        mark_messages_deleted(server,
                                              msgs_to_delete[i:i + chunkSize],
                                              options.only_tag)
                        if options.verbose:
                            print("Batch starting at item %d marked." % i)
                    print("Confirming new numbers...")
                    numdeleted = len(
                        get_deleted_msgnums(server, options.sent_before))
                    numundel = len(
                        get_undeleted_msgnums(server, options.sent_before))
                    print(
                        "There are now %s messages marked as deleted and %s others in %s."
                        % (numdeleted, numundel, mbox))
                    if options.only_tag:
                        numtagged = len(
                            get_tagged_msgnums(server, options.sent_before))
                        print(
                            "There are now %s messages tagged as '%s' in %s." %
                            (numtagged, TAG_NAME, mbox))

        if not options.no_close:
            server.close()

    except ImapDedupException as e:
        print("Error:", e, file=sys.stderr)
    finally:
        server.logout()
Example #25
0
def get_email_object(message_data: str) -> Message:
    b64decoded = base64.urlsafe_b64decode(message_data)
    parser = BytesParser()
    return parser.parsebytes(b64decoded)