Example #1
0
    def Receive(self, index):
        self.server = poplib.POP3_SSL(self.emailInfo["pop3_server"])
        # 身份认证:
        self.server.user(self.emailInfo["email"])
        self.server.pass_(self.emailInfo["pwd"])
        if index > 6:
            for i in range(index, index - 6, -1):
                msg_content = ''
                resp, lines, octets = self.server.retr(i)
                msg_content = b'\r\n'.join(lines)

                # # 稍后解析出邮件:
                msg = BytesParser().parsebytes(msg_content)
                self.print_info(msg)

                # 可以根据邮件索引号直接从服务器删除邮件:
                # server.dele(i)
                # 关闭连接:
        else:
            for i in range(index, 0, -1):
                msg_content = ''
                resp, lines, octets = self.server.retr(i)

                # lines存储了邮件的原始文本的每一行,
                # 可以获得整个邮件的原始文本:
                msg_content = b'\r\n'.join(lines)

                # 稍后解析出邮件:
                msg = BytesParser().parsebytes(msg_content)
                self.print_info(msg)
        self.server.quit()
Example #2
0
async def populate_with_test_data(storage):
    """ Populate database with test data for testing purpose """

    account = account_manager.get_account_for_address("*****@*****.**")
    print(account)

    for msg_src, from_addr in incoming_messages:
        msg = BytesParser(policy=policy.default).parsebytes(msg_src)
        recipients = [parse_email("Test <*****@*****.**>")]
        await storage.store_mail(account, msg, from_addr, recipients, incoming=True)

    for msg_src, recipients in outgoing_messages:
        from_addr = "*****@*****.**"
        msg = BytesParser(policy=policy.default).parsebytes(msg_src)
        await storage.store_mail(account, msg, from_addr, recipients, incoming=False)
Example #3
0
    def receive(self):
        l = self.conn.list()  # 列出邮箱中所有的列表,如:收件箱、垃圾箱、草稿箱。。。

        s = self.conn.select('INBOX')  # 选择收件箱(默认)
        result, dataid = self.conn.search(None, 'ALL')

        mailidlist = dataid[0].split()  # 转成标准列表,获得所有邮件的ID
        # type, data = conn.fetch(mailidlist[0], '(RFC822)')

        maillist = []
        for id in mailidlist:
            result, data = self.conn.fetch(id, '(RFC822)')  # 通过邮件id获取邮件
            e = email.message_from_bytes(data[0][1])
            from email.policy import default
            msg = BytesParser(policy=default).parsebytes(data[0][1])
            maillist.append(e)
            subject = email.header.make_header(
                email.header.decode_header(e['SUBJECT']))
            mail_from = email.header.make_header(
                email.header.decode_header(e['From']))
            print("邮件的subject是:%s" % subject)
            print("邮件的发件人是:%s" % mail_from)
            body = str(get_body(e),
                       encoding='gb2312')  # utf-8 gb2312 GB18030解析中文日文英文
            print("邮件内容是:%s" % body)

        return maillist
Example #4
0
    def runReceive(self):
        myPop = ReceiveMail()
        self.popServer = myPop.connect()
        self.emailNum = myPop.GetEmailNum()
        # 循环解析邮件
        for i in range(self.emailNum, 0, -1):
            resp, lines, octets = self.popServer.retr(i)
            msg_content = b'\r\n'.join(lines)

            # 稍后解析出邮件:
            msg = BytesParser().parsebytes(msg_content)
            try:
                # 解析邮件基本信息
                currentEmailInfo = myPop.parseEmailInfo(msg)
                for item in currentEmailInfo:
                    # 判断邮件是否已经添加到列表
                    if item not in self.isReceived:
                        # self.isReceived.update(currentEmailInfo)
                        # 解析邮件内容
                        myPop.parseEmailContent(msg)
                        self.addQList(currentEmailInfo, 'emaillist')
            except Exception as e:
                print(str(e))

        myPop.quit()
        self.loading.hide()
Example #5
0
def message_from_bytes(s, *args, **kws):
    """Parse a bytes string into a Message object model.

    Optional _class and strict are passed to the Parser constructor.
    """
    from email.parser import BytesParser
    return BytesParser(*args, **kws).parsebytes(s)
 def get_meta_data():
     with open('bytes_message', 'rb') as fp:
         headers = BytesParser(policy=default).parse(fp)
     to = headers['to']
     frm = headers['from']
     subject = headers['subject']
     return to, frm, subject
Example #7
0
    def _login_btn_clicked(self):
        # print("Clicked")
        username = self.input_User.get()
        password = self.input_Pass.get()

        print(username, password)

        #conexion a servicios de gmail
        M = poplib.POP3_SSL('pop.gmail.com')
        M.user(username)
        M.pass_(password)
        #obtiene el numero de mensaje
        numero = len(M.list()[1])
        #Obtiene mensaje
        global response, headerLines, bytes
        for i in range(numero):
            # Se lee el mensaje
            response, headerLines, bytes = M.retr(i + 1)
        #se mete todo en un string
        mensaje = b'\n'.join(headerLines)
        #se parsea
        # Se parsea el mensaje
        p = BytesParser()
        email = p.parsebytes(mensaje)
        #crea nueva ventana
        self.new_window(email)
Example #8
0
def query_S3(bucket, objkey):
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket)
    body = ""
    for obj in bucket.objects.all():
        key = obj.key
        if key == objkey:
            body = obj.get()['Body'].read()
    #print(body)
    raw_email = body
    msg = BytesParser(policy=policy.SMTP).parsebytes(body)

    # get the plain text version of the email
    plain = ''
    try:
        plain = msg.get_body(preferencelist=('plain'))
        plain = ''.join(plain.get_content().splitlines(keepends=True))
        plain = '' if plain == None else plain
    except:
        print(
            'Incoming message does not have an plain text part - skipping this part.'
        )

    #print("This is the plaintext : ",plain)

    return plain
Example #9
0
def from_FILE_to_raw_email(file_name):
    global raw_email
    print("Coping from "+file_name+" raw e-mail content.")
    time.sleep(1)

    with open(file_name, 'rb') as fp:
        raw_email = BytesParser(policy=default).parse(fp)
Example #10
0
def make_person_schema(mailFile, outputDir, person_db):
    msg = BytesParser().parse(mailFile)
    # Retrieve the from person.
    (realname, mailAddr) = get_info_from_mail_field(msg['from'])
    person = Person(realname, mailAddr)

    # Add it to the database.
    update_db(person_db, person)

    # Find ourself
    (my_name, my_email) = get_info_from_mail_field(msg['Delivered-To'])
    me = Person(my_name, my_email)

    def addToMyEmailAddr(field_name):
        (_, my_email_addr) = get_info_from_mail_field(msg[field_name])
        if my_email_addr:
            me.addEmail(my_email_addr)

    addToMyEmailAddr('X-Original-To')
    addToMyEmailAddr('Resent-From')

    update_db(person_db, me)

    # Find cc and to relation (excluding ourself)
    link_people(person_db, me, msg.get_all('to', []))
    link_people(person_db, me, msg.get_all('cc', []))
Example #11
0
def process_email(raw_email):
    msg = BytesParser(policy=policy.default).parsebytes(raw_email)
    body = msg.get_body(preferencelist=['plain'])
    content = body.get_payload(decode=True)

    charset = body.get_content_charset()
    if not charset:
        charset = chardet.detect(content)['encoding']
    content = content.decode(charset)

    regex = re.compile('^[^+@]+\+(?P<token>[a-zA-Z0-9]{80})@[^@]+$')

    for addr in msg.get('To', '').split(','):
        m = regex.match(addr.strip())
        if m:
            break

    if not m:
        raise NoTokenFoundException

    token = m.group('token')
    key = token[64:]
    try:
        thread = MessageThread.objects.get(token=token[:32])
        sender = MessageCorrespondent.objects.get(token=token[32:64])
    except models.DoesNotExist:
        raise InvalidTokenException

    if key != hexdigest_sha256(settings.SECRET_KEY, thread.token,
                               sender.token)[:16]:
        raise InvalidKeyException

    Message.objects.create(thread=thread,
                           from_email=sender.email,
                           content=content)
Example #12
0
def process_maildir(config):
    inbox = mailbox.Maildir("~/Maildir")
    for key in inbox.iterkeys():
        try:
            message = inbox[key]
        except email.errors.MessageParseError:
            continue
        msg = BytesParser(policy=policy.default).parsebytes(message.as_bytes())
        replied = process_msg(config=config, msg=msg)

        # Refile to archive or error and delete incoming message
        if not args.dry_run:
            if replied:
                print("OK", message['subject'], message['from'])
                archive.lock()
                archive.add(message)
                archive.unlock()
            else:
                print("NOT OK", message['subject'])
                error.lock()
                error.add(message)
                error.unlock()
            inbox.lock()
            inbox.discard(key)
            inbox.unlock()
def decode_email(
        msg_str, pos, key_map
):  # process whole email parts and build email list/dict records
    filenames = None
    p = BytesParser()
    message = p.parsebytes(msg_str)  # get header
    parts = parse_parts(message,
                        key_map)  # add header parts specified in key_map
    parts['Size'] = len(msg_str)
    plain_body = ''
    html_body = ''
    for part in message.walk():

        plain_body += decode_part(part, 'text/plain')
        if len(plain_body) > 0:
            html_body = ""
        else:
            html_body += decode_part(part, 'text/html')

        fn = part.get_filename()
        if fn:
            if filenames == None: filenames = []
            filenames.append(fn)
    if filenames:
        parts['Attachments'] = filenames
    if len(plain_body) > 0:
        parts['text/plain'] = plain_body
    elif len(html_body) > 0:
        parts['text/html'] = html_body
    return parts
Example #14
0
    def fetch_and_parse(uids):
        ''' fetches and parses up to "commit_limit" new emails '''

        result = list()

        for uid in uids:
            email_dict = dict()
            reply, email_data = imap_server.uid('fetch', uid, '(RFC822)')
            if reply == 'OK':
                raw_email = email_data[0][1]
                email = BytesParser(policy=default).parsebytes(raw_email)
                email_dict['Date'] = datetime.strptime(
                    email['Date'], '%a, %d %b %Y %H:%M:%S %z')

                for header in [
                        'From', 'To', 'Delivered-To', 'Message-ID', 'Subject'
                ]:
                    email_dict[header] = email[header]
                email_dict['plain'] = None
                email_dict['html'] = None
                for part in email.walk():
                    if part.get_content_type() == 'text/html':
                        email_dict['html'] = part.get_body().get_content()
                    elif part.get_content_type() == 'text/plain':
                        email_dict['plain'] = part.get_body().get_content()
                result.append(email_dict)

        return result
Example #15
0
File: etm-n.py Project: 0x024/etm
def get_content(num):
    print(num)
    type, data = raw_conn.fetch(num, '(RFC822)')
    email_date = get_date(email_list[int(count)])
    try:
        msg = BytesParser().parsebytes(data[0][1])
        for part in msg.walk():
            if not part.is_multipart():
                charset = part.get_charset()
                contenttype = part.get_content_type()
                content = part.get_payload(decode=True)
                content = content.decode('GBK')
                temp = time_formate(email_date)
                print(temp)
                if temp == '1':
                    print(temp)
                    get_transfer_v1(content)
                elif temp == '2':
                    print(temp)
                    get_transfer_v2(content)

                #
                #print (content)

    except TypeError:
        print('empty-email')
    except UnicodeDecodeError:
        print('hahah')
Example #16
0
def _get_email_content(uid, data):
    content = dict(text=None, html=None, attachments=[])
    email = BytesParser(policy=policy.default).parsebytes(data)

    for part in email.walk():
        if part.is_multipart():
            continue

        if part.is_attachment():
            content['attachments'].append(_read_attachment(part, uid))
            continue

        if part.get_content_type() == 'text/plain':
            content['text'] = _read_text(part)
            continue

        if part.get_content_type() == 'text/html':
            content['html'] = _read_html(part, uid)
            continue

    if content['html'] and not content['text']:
        tmp = open(content['html'], 'r')
        content['text'] = tmp.read()
        tmp.close()

    return content
Example #17
0
def emails_between(persons):
    keys = [k for k,v in persons.items()]
    red_flag = []
    for k,v in persons.items():
        [red_flag.append(li) for li in v]
    for key in keys:
        directory = os.path.join("C:/PythonProjects/",key)
        for subdir, dirs, files in os.walk(directory):
            for file in files:
                with open(os.path.join(subdir,file), 'rb') as fp:
                    headers = BytesParser(policy=default).parse(fp)
                    sender = '{}'.format(headers['from'])
                    receiver = '{}'.format(headers['to']).split(',')
                    cc_receiver = '{}'.format(headers['cc']).split(',')
                    bcc_receiver = '{}'.format(headers['bcc']).split(',')
                    email_dated = '{}'.format(headers['date'])
                    date = re.findall(r"[\d]{1,2} [ADFJMNOS]\w* [\d]{4}", email_dated)[0]
                    subject_line = '{}'.format(headers['subject'])
                    if sender in red_flag:
                        for el in receiver:
                            if el.strip() in red_flag:
                                print('[',date,']', sender, ' -> ', el, \
                                      '\n\tSubject:', subject_line)
                        for el in cc_receiver:
                            if el.strip() in red_flag:
                                print('[',date,']', sender, ' -> ', el, \
                                      '\n\tSubject:', subject_line)
                        for el in bcc_receiver:
                            if el.strip() in red_flag:
                                print('[',date,']', sender, ' -> ', el, \
                                      '\n\tSubject:', subject_line)
def processEmail(emailBytes):
    try:
        msg = BytesParser(policy=policy.default).parse(io.BytesIO(emailBytes))
        text = msg.get_body(preferencelist=('plain')).get_content()
        text = emailBytes.decode()
    except Exception as e:
        text = emailBytes.decode()

    lines = text.split('\n')
    if 'Subject:' in lines[0]:
        subject = lines[0][8:]
    else:
        subject = ''

    if subject != '':
        text = ' '.join(lines)
    else:
        text = ' '.join(lines[1:])
    # print(f'Pre-formatted text: {text}')
    text = re.sub(r'https?://\S+', '', text,
                  flags=re.MULTILINE)  # remove links
    text = re.sub(r' +|\t+|\\n', ' ', text)  # remove unnecessary spaces
    text = re.sub(r'\s([,?.!"](?:\s|$))', r'\1',
                  text)  # remove spaces before punctuation
    # print(f'Text: {text}')

    # Check if text is empty before forwarding

    return subject, text
Example #19
0
 def resolve_domain(self, request_string):
     try:
         _, parsed_request = request_string.split(b'\r\n', 1)
         headers = BytesParser().parsebytes(parsed_request)
         host = headers["host"].split(":")
         _q_s.logs.info([
             "servers", {
                 'server': 'http_proxy_server',
                 'action': 'query',
                 'ip': self.transport.getPeer().host,
                 'port': self.transport.getPeer().port,
                 'payload': host[0]
             }
         ])
         #return "127.0.0.1"
         return dsnquery(host[0], 'A')[0].address
     except Exception as e:
         _q_s.logs.error([
             "errors", {
                 'server': 'http_proxy_server',
                 'error': 'resolve_domain',
                 "type": "error -> " + repr(e)
             }
         ])
     return None
Example #20
0
    def _parse_data(data: bytes) -> dict:
        data = data.split(b'|', 2)

        data_dict = {
            'from': data[0].decode('utf-8'),
            'to': data[1].decode('utf-8')
        }

        path, headers = data[2].split(b'\r\n', 1)
        payload = BytesParser().parsebytes(headers)

        host = payload['host']
        path_part = path.split(b' ')[1].decode('utf-8')
        url = f'http://{host}{path_part}'

        if url.lower().endswith(
            ('.png', '.ico', '.jpeg', '.jpg', '.gif', '.svg')):
            data_dict['image'] = url
        else:
            data_dict['url'] = url

        if 'cookie' in payload:
            data_dict['cookie'] = payload['cookie']

        post_data = data[2].split(b'\r\n\r\n')
        if len(post_data) == 2:
            if post_data[1].strip():
                data_dict['post'] = post_data[1].decode('utf-8')

        return data_dict
Example #21
0
def _read_multipart_field(fp, boundary, parser=BytesParser(_class=HTTPMessage)):
    """
    Read a single part from a multipart/form-data message and return a tuple of
    ``(headers, data)``. Stream ``fp`` must be positioned at the start of the
    header block for the field.

    Return a tuple of ('<headers>', '<data>')

    ``headers`` is an instance of ``email.message.Message``.

    ``data`` is an instance of ``ExpandableOutput``.

    Note that this currently cannot handle nested multipart sections.
    """
    data = ExpandableOutput()
    headers = parser.parse(DelimitedInput(fp, b'\r\n\r\n'), headersonly=True)
    fp = DelimitedInput(fp, b'\r\n--' + boundary)

    # XXX: handle base64 encoding etc
    for chunk in iter(lambda: fp.read(CHUNK_SIZE), b''):
        data.write(chunk)
    data.flush()

    # Fallen off the end of the input without having read a complete field?
    if not fp.delimiter_found:
        raise RequestParseError("Incomplete data (expected boundary)")

    return headers, data
def parse_body(body):
    """
        Parse the body from the email and extract the required fields. 
        Need to extract sender email, subject of the email, the receive date, and body of the email.
    """
    msg = BytesParser(policy=policy.SMTP).parsebytes(body)
    print("This is the message: ", msg.keys())
    print("From : ",msg['From'])
    print("Date: ",msg['Date'])
    print("To: ",msg['To'])
    print("Subject : ",msg['Subject'])
    plain = ''
    try:
        plain = msg.get_body(preferencelist=('plain'))
        plain = ''.join(plain.get_content().splitlines(keepends=True))
        plain = '' if plain == None else plain
    except:
        print('Incoming message does not have an plain text part - skipping this part.')
        
    return {
        'from': msg['From'],
        'to': msg['To'],
        'subject': msg['Subject'],
        'date': msg['Date'],
        'text':plain
        }
Example #23
0
    def fillUp(self):
        fruits= []
        #status= open("status.remi","r",encoding="utf8")
        #self.myEmails
        #if self.connected==False:
        self.connect()

        result, data = self.mail.uid('search', None, "ALL") # search and return uids instead
        id_list = data[0].split()

        for latest_email_uid in id_list[-100::1]:
            uniqueEmail=repr(latest_email_uid)
            if False:
                pass

            else:
                result, data = self.mail.uid('fetch', latest_email_uid, '(RFC822)')
                raw_email = data[0][1]
                # here's the body, which is raw text of the whole email
                # including headers and alternate payloads

                #Parsing
                manager=BytesParser()
                email_message = manager.parsebytes(raw_email)

                try:
                    message_juice= email_message.get_payload(decode=False)
                    while type(message_juice)==type([1,2]) and type(message_juice[0].get_payload(decode=False))==type([1,2]):
                        message_juice= message_juice[0].get_payload(decode=False)

                    if type(message_juice)==type([1,2]):
                        if message_juice[-1].get_filename() == None:
                            html_message_juice= message_juice[-1].get_payload(decode=True)
                        else:
                            html_message_juice= message_juice[0].get_payload(decode=True)
                    else:
                        html_message_juice= email_message.get_payload(decode=True)

                    try:
                        #fruits.append(html_message_juice.decode())
                        ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8")
                        ssd.write(html_message_juice.decode())
                        ssd.close()
                        #newBlog= Blog(title=email_message['Subject'], body= html_message_juice.decode())
                        #newBlog.save()
                        #self.setData(self,uniqueID=uniqueEmail) #string of latest_email_uid
                    except:
                        #fruits.append(html_message_juice.decode('windows-1251'))
                        ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8")
                        ssd.write(html_message_juice.decode('windows-1251'))
                        ssd.close()
                        #newBlog= Blog(title=email_message['Subject'], body= html_message_juice.decode('windows-1251'))
                        #newBlog.save()
                        #self.setData(self,uniqueID=uniqueEmail) #string of latest_email_uid

                except:
                    #fruits.append("This email could not be processed see what happened \n\nSubject: "+email_message['Subject'])
                    ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8")
                    ssd.write("This email could not be processed see what happened \n\nSubject: "+email_message['Subject'])
                    ssd.close()
    def parse(self, response):
        """
        `parse` should always `yield` Meeting items.

        Change the `_parse_title`, `_parse_start`, etc methods to fit your scraping
        needs.
        """
        msg = BytesParser(policy=default).parsebytes(response.body)
        detail_text = self._parse_email_text(msg)
        start = self._parse_start(detail_text)
        if not start:
            return
        meeting = Meeting(
            title="Commission",
            description="",
            classification=COMMISSION,
            start=start,
            end=None,
            all_day=False,
            time_notes="Confirm details with agency",
            location=self.location,
            links=[],
            source=response.url,
        )

        meeting["status"] = self._get_status(meeting, text=detail_text)
        meeting["id"] = self._get_id(meeting)

        yield meeting
Example #25
0
    def _encode_parts(self, header_data, msg_data, encoder):
        """Encodes any MIME part in the current message that is 8-bit.

        :type header_data: :py:obj:`bytes`
        :type msg_data: :py:obj:`bytes`
        """
        self.headers = None
        self.message = None

        if six.PY3:
            msg = BytesParser().parsebytes(header_data+msg_data)

        else:
            msg = Parser().parsestr(header_data+msg_data)

        for part in msg.walk():
            if not part.is_multipart():
                payload = part.get_payload()
                try:
                    payload.encode('ascii')
                except UnicodeError:
                    del part['Content-Transfer-Encoding']
                    encoder(part)

        self.parse_msg(msg)
Example #26
0
 def get_mail_content(self, file_name):
     # msg = email.message_from_file(open('sample.eml'))
     with open(file_name, 'rb') as fp:
         msg = BytesParser(policy=policy.default).parse(fp)
     text = msg.get_body(preferencelist=('plain')).get_content()
     fp.close()
     return text
Example #27
0
def message_from_binary_file(fp, *args, **kws):
    """Read a binary file and parse its contents into a Message object model.

    Optional _class and strict are passed to the Parser constructor.
    """
    from email.parser import BytesParser
    return BytesParser(*args, **kws).parse(fp)
Example #28
0
def parseweekmail(el, pl, st):
    '''
    :param el 邮箱长度
    :param pl poplib server对象
    :param st 解析周报的开始时间
    :return 邮箱列表
    '''
    sender_list = []
    for index in range(el, 0, -1):
        lines = pl.retr(index)[1]
        msg = BytesParser(EmailMessage).parsebytes(b'\r\n'.join(lines))

        # 判断是否是本周  判断是否接受者是周报组
        mail_date = parsedate_to_datetime(msg.get('Date', "")).date()
        mail_receiver = parseaddr(msg.get('To', ""))[1]
        mail_cc = parseaddr(msg.get('Cc', ""))[1]
        if mail_date < st:
            break
        mail_subject = decode_str(msg.get('Subject', ""))
        if (mail_receiver == WEEKLY_GROUP or WEEKLY_GROUP in mail_cc) and not (
                mail_subject.startswith('项目周报')
                or decode_str(mail_subject).split('(')[0].endswith('项目周报')
                or decode_str(mail_subject).split('(')[0].endswith('项目周报')):
            sender_list.append(parseaddr(msg.get('From', ""))[1])
    return sender_list
Example #29
0
def get_email(num, conn):
    result = {}
    typ, content = conn.fetch(num, '(RFC822)')
    msg = BytesParser().parsebytes(content[0][1])
    sub = msg.get('Subject')
    from_ = msg.get("From")
    # Body details
    result["From"] = decode_str(from_, "From")
    result["Subject"] = decode_str(sub, "Subject")
    result["File"] = []
    for part in msg.walk():
        if part.get_content_type() == "text/plain":
            body = part.get_payload(decode=True)
            charsets = part.get_charsets()
            result["Body"] = body.decode(charsets[0])
        fileName = part.get_filename()
        if None != fileName:
            file_dict = {}
            file_dict["name"] = decode_str(fileName, "File")
            file_dict["attachment"] = part.get_payload(decode=True)
            file_dict["content_type"] = part.get_content_type()
            new_file = ContentFile(file_dict["attachment"])
            file_obj = UploadedFile(new_file, file_dict["name"],
                                    file_dict["content_type"], new_file.size,
                                    None, None)
            result["File"].append(file_obj)


#                 fileName_str = decode_str(fileName,"File")
#                 att_path = os.path.join(settings.LOG_DIR,fileName_str)
#result["File"] = part.get_payload(decode=True)
#                 fp = open(att_path, 'wb')
#                 fp.write(part.get_payload(decode=True))
#                 fp.close()
    return result
Example #30
0
 def __init__(self, mail_data, mysql_creds, threshold, sensitivity, account,
              logger, mail_id, spam_folder):
     self.JS_IMPORT_REGEX = r'/<script.*(?:src="(.*)").*>/s'
     self.JS_EXTRACT_REGEX = r'/<script.*>(.*?)<\/script>/s'
     self.URL_REGEX = "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|[^\x00-\x7F]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
     self.parser = BytesParser()
     self.sensitivity = sensitivity
     self.threshold = threshold
     self.log = logger
     self.spam_folder = spam_folder
     self.mysql_db = mysql.connector.connect(
         user=mysql_creds["mysql_username"],
         password=mysql_creds["mysql_password"],
         database=mysql_creds["mysql_database"],
         host=mysql_creds["mysql_host"])
     self.account = account
     self.spam_points = 0
     self.js_code = {}
     self.urls_in_document = []
     self.documents = {}
     self.mail_id = mail_id
     # The headers are defined as <key>:<to_remove_from key>
     # -1 is used to define the last header, after that comes the mail contents
     self.whitelisted = False
     self.blacklisted = False
     self.parsed_mail = self.parser.parsebytes(mail_data)
     self.header_data = dict(self.parsed_mail)
     self.message = ""
     self.extract_message()
     self._spam = -1
     self.check_whitelist()
     self.check_blacklisted()
     self.urls = re.findall(self.URL_REGEX, self.message)
     for i in range(len(self.urls)):
         self.urls[i] = self.urls[i].strip()