Python BytesParser.get_bodyの例、email.parser.BytesParser.get_body Pythonの例

コード例 #1

0

ファイルを表示

ファイル: mail.py プロジェクト: UKTradeInvestment/barbara

    def __init__(self, data):
        """
        Cribbed heavily from
        https://www.ianlewis.org/en/parsing-email-attachments-python
        """

        Loggable.__init__(self)

        self.raw = data
        self.attachments = []
        self.recipients = []

        message = BytesParser(policy=policy.default).parsebytes(self.raw)

        self.hash = hashlib.sha512(data).hexdigest()
        self.sender = parseaddr(str(message["From"]))[1].lower()
        self.subject = str(message["Subject"]).replace("\r\n", "")

        # Prefer plain text and strip everything south of the signature. Note
        # that I'm not sure what will happen here if you send an HTML-only
        # email.
        self.body = "\n\n".join(
            re.sub(r"\r?\n\r?\n-- \r?\n.*", "", str(
                message.get_body(
                    preferencelist=('plain', 'related', 'html')
                )
            ), flags=re.DOTALL).split("\n\n")[1:]
        )

        self._set_recipients(message)
        self._set_time(message)
        self._set_attachments(message)

        self.logger.info('Consuming email: "{}"'.format(self.subject))

コード例 #2

0

ファイルを表示

ファイル: mail.py プロジェクト: XanderDwyl/paperless

    def __init__(self, data, verbosity=1):
        """
        Cribbed heavily from
        https://www.ianlewis.org/en/parsing-email-attachments-python
        """

        self.verbosity = verbosity

        self.subject = None
        self.time = None
        self.attachment = None

        message = BytesParser(policy=policy.default).parsebytes(data)
        self.subject = str(message["Subject"]).replace("\r\n", "")
        self.body = str(message.get_body())

        self.check_subject()
        self.check_body()

        self._set_time(message)

        Log.info(
            'Importing email: "{}"'.format(self.subject), Log.COMPONENT_MAIL)

        attachments = []
        for part in message.walk():

            content_disposition = part.get("Content-Disposition")
            if not content_disposition:
                continue

            dispositions = content_disposition.strip().split(";")
            if not dispositions[0].lower() == "attachment":
                continue

            file_data = part.get_payload()

            attachments.append(Attachment(
                b64decode(file_data), content_type=part.get_content_type()))

        if len(attachments) == 0:
            raise InvalidMessageError(
                "There don't appear to be any attachments to this message")

        if len(attachments) > 1:
            raise InvalidMessageError(
                "There's more than one attachment to this message. It cannot "
                "be indexed automatically."
            )

        self.attachment = attachments[0]

コード例 #3

0

ファイルを表示

def extract_contents(message_object, email_name):
    message_byte_object = BytesParser(policy=policy.default) \
        .parsebytes(message_object)

    email_object = {
        'id': email_name,
        'from': get_address_from_email(message_byte_object, 'From'),
        'to': get_address_from_email(message_byte_object, 'To'),
        'cc': get_address_from_email(message_byte_object, 'CC'),
        'subject': str(message_byte_object['subject']),
        'date': get_date_field_from_email(message_byte_object),
        'body':
        message_byte_object.get_body(preferencelist='plain').get_content()
    }

    print('email object {}'.format(email_object))

    return json.dumps(email_object)

コード例 #4

0

ファイルを表示

ファイル: emailread.py プロジェクト: kryztof/pit

  def process_mailbox(self):
    rv, data = self.imap.uid('search',None, "ALL")
    if rv != 'OK':
      dbgprint("No messages found!")
      return

    #delete removed messages
    self.remove_deleted_msgs_from_history(data[0].split())
    self.post_progress(50)
      
    #get last history uid
    lastmessage=0
    if self.histcontainer.get_nr_elements('email') > 0:
      lastmessage = self.histcontainer.get_last_element('email').get_uid()

    #lastmessage = 35
    dbgprint("last element uid: " + str(lastmessage))

    #loop over all messages and download new ones
    if (int(data[0].split()[-1])-lastmessage) > 0 :
      progressstep = 30/(int(data[0].split()[-1])-lastmessage)
      progressactual = 50
      for uid in data[0].split():
        if int(uid) <= lastmessage:
          continue
        rv, data = self.imap.uid('fetch', uid, '(RFC822)')
        if rv != 'OK':
          dbgprint("ERROR getting message " + uid)
          continue
        #dbgprint("New message UID: "+ str(int(uid))) 
        msg = email.message_from_bytes(data[0][1])
        msg2 = BytesParser(policy=policy.default).parsebytes(data[0][1])
        #dbgprint("BODY:",msg2.get_body(),"------------------")
        body = msg2.get_body(preferencelist=('plain', 'html'))

        el = self.histcontainer.make_element_from_message(int(uid),msg)
        self.process_body(str(body),el)
        self.process_attachments(msg,el)
        self.histcontainer.add_element(el)

        self.mark_msg_as_read(int(uid))
        progressactual+=progressstep
        self.post_progress(progressactual)

コード例 #5

0

ファイルを表示

def read_mail_ru(login, pw, del_mail=0):
    today = get_today()
    server = "pop.mail.ru"  # "pop.att.yahoo.com"
    try:
        box = poplib.POP3_SSL(
            server,
            995)  # в принципе, если порт 995, то его можно и не указывать
        print('ok pop3 login=%s pass=%s' % (login, pw))
        box.user(login)
        box.pass_(pw.strip())
        print('ok login')
        response, lst, octets = box.list()
    except:
        print('err pop3')
        return -1

    s = today + ' ' + login + ' messages: ' + n2s(
        len(lst)) + ' ' + b2s(response) + '\n'
    write_file('mail_log', s, 2)
    print(s)
    for msgnum, msgsize in [i.split() for i in lst]:
        n = int(msgnum)
        print(n, int(msgsize))
        (resp, lines, octets) = box.retr(n)
        bb = b'\n'.join(lines) + b'\n'
        ss = ''
        msg = BytesParser(policy=policy.default).parsebytes(bb)
        ss += 'from: ' + msg['from'] + '\n'  #  '[email protected]'
        ss += 'subject: ' + msg['subject'] + '\n'
        ss += 'date: ' + msg['date'] + '\n'
        ss += '--------------------------\n'
        ss += msg.get_body(preferencelist=('plain', 'html')).get_content()
        ss += '\n==================================================\n'
        sm = get_mail(msg['from'])
        f = login + '\\' + str(n) + '_' + today + '(' + sm + ')'
        write_file(f + '.txt', ss, 2)  #декодиравал простые
        #html теги удалить.    value input? script?
        write_file(f + '.bin', bb, 2)  #+сохр как есть
        if del_mail != 0: box.dele(n)  # если надо - удаляем с сервера письмо
    #end-for
    box.quit()

コード例 #6

0

ファイルを表示

    def _find_verify_url(self, send_time: datetime = None):
        """
            find verify url.

            :return: None
        """

        for _id in self._mail_ids():
            _, data = self.mail.fetch(str(_id), "(RFC822)")
            try:
                msg = email.message_from_string(data[0][1])
            except TypeError:
                msg = email.message_from_bytes(data[0][1])

            if msg["from"].find(self.from_email) != -1:
                is_check = True
                date_tuple = email.utils.parsedate_tz(msg["date"])
                msg_date = datetime.datetime.fromtimestamp(
                    email.utils.mktime_tz(date_tuple))

                print("_find_verify_url")
                print(msg_date)

                if send_time:
                    is_check = False
                    if send_time <= msg_date:
                        is_check = True
                if is_check:
                    body_msg = BytesParser(policy=policy.default).parsebytes(
                        data[0][1])
                    body = body_msg.get_body(preferencelist=("plain", "html"))
                    verify_url = self._find_link_by_pattern(
                        body=body.get_content().splitlines(),
                        pattern=self.pattern)
                    if verify_url:
                        return verify_url

コード例 #7

0

ファイルを表示

    def handle_DATA(self, server, session, envelope):
        mail_from = envelope.mail_from

        message = BytesParser(policy=policy.default).parsebytes(
            envelope.content)
        body = message.get_body(preferencelist=('plain', ))
        if body:
            content = body.get_content()
            reply = EmailReplyParser.parse_reply(content)
            author, _ = User.objects.get_or_create(email=mail_from)
            ticket, message_id = self.get_ticket(message)

            if ticket:
                if not ticket.inbox.enable_reply_by_email:
                    return '450 Reply by email is disabled for the inbox'

                Comment.objects.create(
                    ticket=ticket,
                    author=author,
                    is_reply=ticket.reply_message_id == message_id,
                    content=reply)
                UserInbox.objects.get_or_create(user=author,
                                                inbox=ticket.inbox)

            else:
                inbox = Inbox.objects.get(email__in=envelope.rcpt_tos, )
                if not inbox.enable_create_new_ticket_by_email:
                    return '450 Creation of ticket by email is disabled for the inbox'

                Ticket.objects.create(author=author,
                                      inbox=inbox,
                                      title=message["Subject"],
                                      content=reply)
                UserInbox.objects.get_or_create(user=author, inbox=inbox)

        return '250 OK'

コード例 #8

0

ファイルを表示

ファイル: final_email_parser.py プロジェクト: qbrc-cnap/biz_tier

    tds = footer.find_all('td')
    if len(tds) == 1:
        td = tds[0]
        questions = [x.text.strip() for x in td.select('.question')]
        answers = [x.text.strip() for x in td.select('.answer')]
        return dict(zip(questions, answers))
    else:
        return {}


if __name__ == '__main__':

    with open('scratch/example_email.txt', 'rb') as fp:
        whole_email = BytesParser(policy=default).parse(fp)

    body = whole_email.get_body()

    if body['content-type'].subtype == 'html':
        html_str = body.get_content()

    soup = BeautifulSoup(html_str, 'html.parser')

    order_id = get_order_id(soup)

    purchase = get_purchase_details(soup)

    client_email = get_client_email(soup)

    additional_q_and_a = get_additional_q_and_a(soup)

    # print outs for testing purposes:

コード例 #9

0

ファイルを表示

    def getData(self):
        
        def getAuthModality(data):
            test = 'using'
            found = False
            for d in data:
                if found:
                    text = d.text.strip()
                    if text[:1] == '"' and text[-1:] == '"':
                        text = text[1:-1].strip()
                    return text
                if re.search(test, str(d)):
                    found = True

        def getDate(data):
            test = re.compile(r'[0-9]{2}/[0-9]{2}/[0-9]{4}')
            for d in data:
                search_res = re.search(test, str(d))
                if search_res:
                    text = search_res.group(0)
                    return text

        def getTime(data):
            test = re.compile(r'[0-9]{2}:[0-9]{2}:[0-9]{2}')
            for d in data:
                search_res = re.search(test, str(d))
                if search_res:
                    text = search_res.group(0)
                    return text

        def getAUAName(data):
            test = 'deployed by'
            found = False
            for d in data:
                if found:
                    text = d.text.strip()
                    if text[:1] == '"' and text[-1:] == '"':
                        text = text[1:-1].strip()
                    return text
                if re.search(test, str(d)):
                    found = True

        def getUIDAIResponseCode(data):
            test = 'Response code'
            for d in data:
                if re.search(test, str(d)):
                    text = d.text.strip()
                    text = text[14:].strip()
                    return text

        def getAuthenticationResponse(data):
            for d in data:
                if re.search('success', str(d)):
                    text = 'Success'
                    return text
                if re.search('fail', str(d)):
                    text = 'Failure'
                    return text

        def getDataMethod1(soup):
            data = AadhaarAuthenticationMail()
            temp = [x for x in soup.find(id='demo').next_siblings]
            # The gernerator object soup.find(id='demo').next_siblings
            # loses items which have been iterated through
            # So created a list to iterte multiple times
            data.Auth_Modality = getAuthModality(temp)
            data.Date = getDate(temp)
            data.Time = getTime(temp)
            data.AUA_Name = getAUAName(temp)
            data.UIDAI_Response_Code = getUIDAIResponseCode(temp)
            data.Authentication_Response = getAuthenticationResponse(temp)
            return data

        def getDataMethod2(soup):
            data = AadhaarAuthenticationMail()
            temp1 = [x for x in soup.find(id='demo').next_siblings]
            temp2 = [x for x in soup.body.next_siblings]
            data.Auth_Modality = getAuthModality(temp1)
            data.Date = getDate(temp1)
            data.Time = getTime(temp1)
            data.AUA_Name = getAUAName(temp2)
            data.UIDAI_Response_Code = getUIDAIResponseCode(temp2)
            data.Authentication_Response = getAuthenticationResponse(temp1)
            return data

        with open(self.uri, 'rb') as file:
            msg = BytesParser(policy=policy.default).parse(file)
        msg_body = msg.get_body(preferencelist=('plain', 'html'))
        soup = BeautifulSoup(msg_body.get_content(), 'html.parser')
        method1data = getDataMethod1(soup)
        print('Data extracted method1: ', vars(method1data))
        if method1data.isClean():
            return method1data
        else:
            print("Data Not clean")
        method2data = getDataMethod2(soup)
        print('Data extracted method2: ', vars(method2data))
        if method2data.isClean():
            return method2data
        else:
            print("Data Not clean")

コード例 #10

0

ファイルを表示

ファイル: delta_email_parser.py プロジェクト: mayailkour/android

eMailQuery = re.compile(r'<(.+)>$')


def removeTraitors(traitorAddress, element):
    if traitorAddress in element['senders']:
        element['count'] = element['count'] - 1
        element['senders'] = [
            x for x in element['senders'] if x is not traitorAddress
        ]
    return element


for mail in filelist:
    with open(mail, 'rb') as msg:
        msg = BytesParser(policy=policy.default).parse(msg)
        parsed = msg.get_body(preferencelist=('plain'))
        if parsed is None:
            continue
        text = parsed.get_content()

        sender = re.search(eMailQuery, msg['From'])
        if sender is None:
            continue
        if sender.groups()[0] in addresses:
            print('XXXXXX ---- We have a traitor: ', sender.groups()[0])
            for key, value in apps.items():
                value = removeTraitors(sender.groups()[0], value)
            continue
        else:
            addresses.append(sender.groups()[0])
        info = re.search(query, text)

コード例 #11

0

ファイルを表示

from imaginary import magic_html_parser

# In a real program you'd get the filename from the arguments.
msg = BytesParser(policy=policy.default).parse(open('outgoing.msg', 'rb'))

# Now the header items can be accessed as a dictionary, and any non-ASCII will
# be converted to unicode:
print('To:', msg['to'])
print('From:', msg['from'])
print('Subject:', msg['subject'])

# If we want to print a priview of the message content, we can extract whatever
# the least formatted payload is and print the first three lines.  Of course,
# if the message has no plain text part printing the first three lines of html
# is probably useless, but this is just a conceptual example.
simplest = msg.get_body(preferencelist=('plain', 'html'))
print()
print(''.join(simplest.get_content().splitlines(keepends=True)[:3]))

ans = input("View full message?")
if ans.lower()[0] == 'n':
    sys.exit()

# We can extract the richest alternative in order to display it:
richest = msg.get_body()
partfiles = {}
if richest['content-type'].maintype == 'text':
    if richest['content-type'].subtype == 'plain':
        for line in richest.get_content().splitlines():
            print(line)
        sys.exit()

コード例 #12

0

ファイルを表示

ファイル: terms_extraction.py プロジェクト: as1mple/Search_terminology_words

    def get_text_with_eml(self) -> str:

        file_list = glob.glob('*.eml')  # returns list of files
        with open(file_list[2], 'rb') as fp:  # select a specific email file from the list
            msg = BytesParser(policy=policy.default).parse(fp)
        return msg.get_body(preferencelist=('plain')).get_content()

コード例 #13

0

ファイルを表示

ファイル: mail-pop3.py プロジェクト: zhujidong/python-test

#从字节串生成 EmailMessage 消息类
msg = BytesParser(policy=default).parsebytes(mail_bytes)
'''
下面这种只是多了一个由字节解码为字符的过程，无意义
mail_str = b'\r\n'.join(mail_body).decode( 'utf_8' )
msg = Parser(policy=default).parsestr( mail_str )
'''

print('邮件主题->> {}'.format(msg['Subject']))
print('日期->> {}'.format(msg['Date'].datetime))
#返回的是 发件人名称<电子邮件地址> 形式
print('发件人->> {}'.format(msg['From']))
print('主类型->> {}'.format(msg['Content-Type']))

#得到MIME段
text = msg.get_body(preferencelist=('related', 'html', 'plain'))
print(text.get_content())
'''
#返回的是 Address类的元组:
#(Address(display_name='zhujidong', username='******', domain='163.com'),)
print( msg['To'].addresses )

#访问元组的一个元素的值
print( msg['From'].addresses[0].addr_spec )
print( msg['To'].addresses[0].display_name )
print( msg['To'].addresses[0].username )
print( msg['To'].addresses[0].domain )

#深度优先顺序遍历信息对象树的所有部分和子部分
for part in msg.walk(): 
    print(part.get_content_type())

コード例 #14

0

ファイルを表示

def parseMails():
    for mail in filelist:
        with open(mail, 'rb') as msg:
            # Convert Message to String
            msg = BytesParser(policy=policy.default).parse(msg)
            parsed = msg.get_body(preferencelist=('plain'))
            # Skip if body is empty
            if parsed is None:
                continue
            emailBody = parsed.get_content()

            # Check if sender exists
            sender = re.search(eMailQuery, msg['From'])
            if sender is None:
                continue

            # check if sender crossed limit
            if sender.groups()[0] not in addresses:
                addresses[sender.groups()[0]] = 1
            elif addresses[sender.groups()[0]] == requestlimit:
                print('XXXXXX ---- We have a greedy one: ', sender.groups()[0])
                for key, value in apps.items():
                    value = removeGreedy(sender.groups()[0], value)
                continue
            else:
                addresses[sender.groups()[0]] += 1

            appInfo = re.search(appInfoQuery, emailBody)

            # AppInfo could not automatically be extracted
            if appInfo is None:
                # Search for String appearance of existing ComponentInfos in E-Mail body
                for key, value in apps.items():
                    if key in emailBody:
                        apps[key]['count'] += 1
                        apps[key]['senders'].append(sender.groups()[0])
                        continue
                print('\n/// The following message could not be handled:\n',
                      sender, emailBody, '\n')
                with open('failedmail.txt', 'a', encoding='utf-8') as fileTwo:
                    fileTwo.write('\n----------------------------\n')
                    fileTwo.write(''.join(emailBody))
            else:
                tempDict = appInfo.groupdict()
                if tempDict['ComponentInfo'] in apps:
                    apps[tempDict['ComponentInfo']]['count'] = apps[
                        tempDict['ComponentInfo']]['count'] + 1
                    apps[tempDict['ComponentInfo']]['senders'].append(
                        sender.groups()[0])
                else:
                    tempDict['count'] = 0
                    tempDict['count'] = 1
                    tempDict['senders'] = [sender.groups()[0]]
                    apps[tempDict['ComponentInfo']] = tempDict
                #Update date of last request
                if 'requestDate' not in apps[
                        tempDict['ComponentInfo']] or apps[
                            tempDict['ComponentInfo']]['requestDate'] < mktime(
                                parsedate(msg['date'])):
                    apps[tempDict['ComponentInfo']]['requestDate'] = mktime(
                        parsedate(msg['Date']))

コード例 #15

0

ファイルを表示

ファイル: pipetobrowser.py プロジェクト: cinghiopinghio/config-files

raw = sys.stdin.buffer.read()

if not os.isatty(0):
    fd = os.open('/dev/tty', os.O_RDONLY)
    if fd < 0:
        sys.stderr.write('Unable to open an input tty.\n')
        sys.exit(-1)
    else:
        os.dup2(fd, 0)
        os.close(fd)

msg = BytesParser(policy=policy.default).parsebytes(raw)

# We can extract the richest alternative in order to display it:
richest = msg.get_body()
partfiles = {}
if richest['content-type'].maintype == 'text':
    if richest['content-type'].subtype == 'plain':
        for line in richest.get_content().splitlines():
            print(line)
        sys.exit()
    elif richest['content-type'].subtype == 'html':
        body = richest
    else:
        print("Don't know how to display {}".format(richest.get_content_type()))
        sys.exit()
elif richest['content-type'].content_type == 'multipart/related':
    body = richest.get_body(preferencelist=('html'))
    for part in richest.iter_attachments():
        fn = part.get_filename()

コード例 #16

0

ファイルを表示

ファイル: check_email.py プロジェクト: ak545/monitoring-importan-emails

def main():
    try:
        Path(EML_PATH).mkdir(parents=True, exist_ok=True)
    except:
        # print(f'Error creating folder: {FLR}{EML_PATH}')
        print(f'Ошибка создания папки: {FLR}{EML_PATH}')
        sys.exit(-1)

    try:
        Path(EML_PATH_READY).mkdir(parents=True, exist_ok=True)
    except:
        # print(f'Error creating folder: {FLR}{EML_PATH_READY}')
        print(f'Ошибка создания папки: {FLR}{EML_PATH_READY}')
        sys.exit(-1)

    letters_on_the_server_list = []
    letters_on_the_cache_list = []

    # Создать список файлов в кэше
    # Create Cached File List
    for (_, _, filenames) in os.walk(EML_PATH):
        for i in filenames:
            filename, file_extension = os.path.splitext(i)
            if '.eml' in file_extension:
                letters_on_the_cache_list.append(filename)
        break

    # Начало...
    # Begin...

    # Подключение к IMAP4 серверу
    # Connect to IMAP4 server
    mail = imaplib.IMAP4_SSL(CONTROLLED_EMAIL_SERVER)
    try:
        r, data = mail.login(CONTROLLED_EMAIL_ADDRESSES,
                             CONTROLLED_EMAIL_ADDRESSES_PASSWORD)
        if r != "OK":
            str_e = str(data)
            str_e = str_e.strip("b'").strip("'")
            # print(f'{FLR}Error login        : {str_e}')
            print(f'{FLR}Ошибка подключения : {str_e}')
    except (imaplib.IMAP4.error, OSError) as e:
        str_e = str(e)
        str_e = str_e.strip("b'").strip("'")
        # print(f'{FLR}Error login        : {str_e}')
        print(f'{FLR}Ошибка подключения : {str_e}')
        sys.exit(-1)

    # Получить список каталогов "INBOX", "Sent", и т.п.
    # Get the list of catalogs "INBOX", "Sent", etc.
    mail.list()

    # Переходим в папку INBOX
    # Go to the INBOX folder
    _, select_data = mail.select('INBOX')
    select_data[0].decode('utf-8')

    # Получить список id писем через пробел
    # Get the list id of letters through a space
    _, data = mail.search(None, 'ALL')
    ids = data[0]
    id_list = ids.split()
    count = len(id_list)

    print(f'{SR}')
    print('{:-<80}'.format(''))
    # print(f'Start scan         : {FLC}{"{:%d.%m.%Y %H:%M:%S}".format(datetime.now())}')
    print(
        f'Начало сканирования: {FLC}{"{:%d.%m.%Y %H:%M:%S}".format(datetime.now())}'
    )
    # print(f'Total letters      : {FLG}{count}')
    print(f'Всего писем        : {FLG}{count}')
    print('{:-<80}'.format(''))

    count_found = 0

    if count > 0:
        # Анализ имеющихся писем
        # Analysis of available letters
        for item in id_list:
            email_id = item.decode('utf-8').strip()

            if email_id == '':
                continue

            # Получить письмо
            # Флаг "Невидимый" не сбрасывается
            # Get a letter
            # "Unseen" flag is not reset
            _, data = mail.fetch(email_id, '(BODY.PEEK[])')

            # Необработанное содержимое письма
            # Raw message content
            raw_email = data[0][1]

            # Парсинг содержимого письма
            # Parsing the contents of the letter
            msg = email.message_from_bytes(raw_email,
                                           _class=email.message.EmailMessage)

            # Получить дату письма
            # Get the date of the letter
            str_date = ''
            if msg['Date'] is not None:
                timestamp = email.utils.parsedate_tz(msg['Date'])
                year, month, day, hour, minute, second = timestamp[:6]

                str_date = '{0:02d}.'.format(day)
                str_date += '{0:02d}.'.format(month)
                str_date += '{0:04d} '.format(year)
                str_date += '{0:02d}:'.format(hour)
                str_date += '{0:02d}:'.format(minute)
                str_date += '{0:02d}'.format(second)

            # Получить адрес отправителя письма
            # Get the sender address
            msg_from_decoded = ''
            if msg['From'] is not None:
                str_from = str(msg["From"])
                if '=?' in str_from.strip():
                    msg_from_decoded = str(make_header(
                        decode_header(str_from)))
                else:
                    msg_from_decoded = str_from

                msg_from_decoded = (msg_from_decoded.replace("\n", "").replace(
                    "\r", "").replace("\t", "").strip())

            # Получить декодированную тему письма
            # Get a decoded letter subject
            subj = ""
            if msg["Subject"]:
                str_subj = str(msg["Subject"])
                if '=?' in str_subj.strip():
                    subj = str(make_header(decode_header(str_subj)))
                else:
                    subj = str_subj

                subj = (subj.replace("\n", "").replace("\r",
                                                       "").replace("\t",
                                                                   "").strip())

            # Анализ данных письма
            # Analysis of the letter data
            is_important_letter = False
            for control_email in CONTROLLED_EMAIL_ADDRESSES_SENDERS:
                # Является ли письмо важным?
                # (проверяем, имеется ли адрес отправителя или фрагмент
                # адреса отправителя в списке отслеживаемых важных
                # писем CONTROLLED_EMAIL_ADDRESSES_SENDERS)

                # Is the letter important?
                # (check if the sender's address or the fragment of the
                # sender's address is in the list of monitored important
                # letters CONTROLLED_EMAIL_ADDRESSES_SENDERS)
                if control_email in msg_from_decoded:
                    # Если да, устнавливаем флаг важности письма
                    # If yes, set the letter importance flag
                    is_important_letter = True
                    break

            if is_important_letter:
                # Если письмо ВАЖНОЕ
                # If the letter is IMPORTANT

                # Дата и время обнаружения
                # Date and time of discovery
                date_time_discovery = '{:%d.%m.%Y %H:%M:%S}'.format(
                    datetime.now())

                count_found += 1

                # Установить флаг "Уведомления уже отправлялись"
                # Set the flag "Notifications have already been sent"
                is_notifications_have_already_been_sent = True

                # Message-ID письма
                # Message-ID of the letter
                str_domain = msg_from_decoded.split('@')[-1].strip('>').strip()
                message_id = f'{str_date.replace(":", ".")}@{str_domain}'
                message_id = sanitize_filename(message_id)

                print(f'From               : {FLG}{msg_from_decoded}')
                print(f'Date               : {FLG}{str_date}')
                print(f'Subject            : {FLG}{subj}')

                # Добавить ID письма в список "письма на сервере"
                # Add letter ID to the list of "letters on the server"
                letters_on_the_server_list.append(message_id)

                # Сохранить оригинал письма в .EML формате в
                # папку кэша (если его там ещё нет)
                # Save the original letter in .EML format to
                # the cache folder (if it is not already there)
                eml_file = EML_PATH + message_id + '.eml'

                if not Path(eml_file).is_file():
                    # Сбросить флаг "Уведомления уже отправлялись"
                    # Unset the flag "Notifications have already been sent"
                    is_notifications_have_already_been_sent = False

                    # Сохранить оригинал письма в формате .EML в
                    # папке кэша
                    # Save the original letter in the .EML format in
                    # the cache folder
                    with open(eml_file, 'wb+') as file:
                        file.write(raw_email)

                    # Добавить ID письма в список "письма в кэше"
                    # Add letter ID to the list "letters on the cache"
                    letters_on_the_cache_list.append(message_id)

                if is_notifications_have_already_been_sent:
                    # Если уведомления уже отправлялись,
                    # повторно получателей не уведомлять
                    # If notifications have already been sent,
                    # do not notify recipients again
                    # print(f'\nSkipped            : {FLY}{message_id}{FR}\n'
                    #       f'Cause              : {FLY}Notifications for '
                    #       f'this email have already been sent')
                    print(f'\nПропускается       : {FLY}{message_id}{FR}\n'
                          f'Причина            : {FLY}Уведомления по этому '
                          f'письму ранее уже отправлялись')
                    print('{:-<80}'.format(''))
                    continue

                warning_msg = f'From   : {msg_from_decoded}\n'
                warning_msg += f'Date   : {str_date}\n'
                warning_msg += f'Subject: {subj}\n'

                email_msg = warning_msg

                if Path(eml_file).is_file():
                    # Читать сырой текст оригинального
                    # письма из файла в кэше
                    # Read the raw text of the original
                    # letter from the file in the cache
                    with open(eml_file, 'rb+') as file:
                        eml_msg = BytesParser(
                            policy=policy.default).parse(file)

                    # Конвертировать сырой текст письма в читаемый текст
                    # Convert raw letter text to readable text
                    eml_text_part = ''
                    eml_text_part_b = eml_msg.get_body()
                    if eml_text_part_b is not None:
                        eml_text_part = eml_text_part_b.get_content()
                        if eml_text_part is not None:
                            eml_text_part = re.sub(r'<br.*?>', '\n',
                                                   eml_text_part)
                            eml_text_part = re.sub(r'<.*?>', '', eml_text_part)

                            # Ограничить длину текста (для показа только
                            # фрагмента текста в Telegram-чате)
                            # Limit the length of the text (to display only
                            # a fragment of the text in the Telegram-chat)
                            eml_text_part = str(eml_text_part)[:142].strip()

                    if eml_text_part is not None and eml_text_part != '':
                        warning_msg += '{:-<8}\n'.format('')
                        # warning_msg += 'Summary           :\n'
                        warning_msg += 'Краткое содержание:\n'
                        warning_msg += '{:-<8}\n'.format('')
                        warning_msg += eml_text_part + '...\n'

                warning_msg += '{:-<8}\n'.format('')
                # warning_msg += 'This event applies to all!\n'
                # warning_msg += 'See the full text of the letter in your email.'
                warning_msg += 'Это событие касается всех!\n'
                warning_msg += 'Полный текст письма смотрите в своей почте.'

                # Установить флаг "Отправить полное telegram-уведомление"
                # Set the flag "Send full telegram notification"
                is_send_full_telegram_notification = True

                # Обнулить список получателей для
                # полного Telegram-уведомления
                # Zero the list of recipients for
                # a full Telegram notification
                recepints_for_full_telegram_notification_list = []

                # Обнулить список получателей для
                # неполного Telegram-уведомления
                # Zero the list of recipients for
                # incomplete Telegram notifications
                recepints_for_incomplete_telegram_notification_list = []

                # Отправка персональных Email-уведомлениий получателям
                # Send personal email notifications to recipients
                for recipient_data in RECIPIENTS_FULL.items():
                    # Email получателя
                    # Email of recipient
                    recepient_email = recipient_data[0]

                    # Имя получателя
                    # Name of recipient
                    recepient_name = recipient_data[1][0]

                    if recepient_name.strip() == '':
                        # Если имя получателя отсутствует
                        # If the recipient's name is missing
                        # recepient_name = 'Unknown'
                        recepient_name = 'Вася Пупкин'

                    # Список адресов входящих писем или их фрагментов,
                    # запрещенных для этого получателя
                    # The list of addresses of incoming letters or their
                    # fragments prohibited for this recipient
                    prohibited_email_list = recipient_data[1][1]

                    # Находится ли данное письмо в списке адресов входящих
                    # писем, запрещённых для этого получателя?
                    # Is this letter in the list of addresses of incoming
                    # emails prohibited for this recipient?
                    is_prohibited = False
                    prohibited_part = ''
                    for prohibited_email in prohibited_email_list:
                        if prohibited_email in msg_from_decoded:
                            is_prohibited = True
                            prohibited_part = prohibited_email
                            break

                    # Создать список получателя для def send_email()
                    # Create a recipient list for def send_email ()
                    # to_list[0] - email получателя, email of recipient
                    # to_list[1] - имя получателя, name of recipient
                    to_list = [recepient_email, recepient_name]

                    if not is_prohibited:
                        # По этому письму для данного получателя МОЖНО
                        # отправить Email уведомление
                        # By this letter for this recipient you can
                        # send an email notification

                        # Отправить email-уведомление получателю
                        # Send email to recipient
                        send_email(email_msg,
                                   to_list,
                                   attached_file=eml_file,
                                   date_time=date_time_discovery,
                                   subject=subj)

                        # Добаить имя получателя в список получателей
                        # полного уведомления в Telegram-чате
                        # Add the recipient name to the list of recipients
                        # of the full notification in the Telegram chat
                        recepints_for_full_telegram_notification_list.append(
                            to_list[1])
                    else:
                        # По этому письму для данного получателя ЗАПРЕЩЕНО
                        # отправить Email уведомление
                        # For this recipient, it is FORBIDDEN
                        # to send an Email Notification

                        # Добавить имя получателя в список получателей
                        # неполного уведомления в Telegram-чате
                        # Add recipient name to the list of recipients
                        # of incomplete notification in Telegram chat
                        recepints_for_incomplete_telegram_notification_list.append(
                            to_list[1])

                        # Сбросить флаг "Отправить полное telegram-уведомление"
                        # Unset the flag "Send full telegram notification"
                        is_send_full_telegram_notification = False
                        # print(f'\nSkipped letter     : {FLR}{to_list[0]} ({to_list[1]}){FR}\n'
                        #       f'Cause              : Incoming letter {FLG}{msg_from_decoded}{FR} '
                        #       f'is in the list of prohibited '
                        #       f'for this recipient ({FLR}{prohibited_part}{FR})')
                        print(
                            f'\nПропускается письмо: {FLR}{to_list[0]} ({to_list[1]}){FR}\n'
                            f'Причина            : Входящее письмо {FLG}{msg_from_decoded}{FR} '
                            f'находится в списке запрещённых для '
                            f'этого получателя ({FLR}{prohibited_part}{FR})')
                        print('{:-<80}'.format(''))

                # Отправка общего уведомления в Telegram-чат
                # Sending general notification to Telegram chat
                if is_send_full_telegram_notification:
                    # Отправка полного уведомления в Telegram-чат
                    # Sending full notification to Telegram chat
                    send_telegram(warning_msg, date_time=date_time_discovery)
                else:
                    # Отправка неполного уведомления в Telegram-чат
                    # Sending an incomplete notification to Telegram chat
                    warning_msg = ''
                    for name in recepints_for_full_telegram_notification_list:
                        warning_msg += name + '\n'
                    warning_msg += '{:-<8}\n'.format('')
                    # warning_msg += 'This event is only for recipients listed above!\n'
                    # warning_msg += 'E-mail notification has been sent to all of you.\n'
                    # warning_msg += 'The full text of the letter can be viewed in your email.\n'
                    warning_msg += 'Это событие только для получателей, перечисленных выше!\n'
                    warning_msg += 'Всем вам отправлено уведомление по e-mail.\n'
                    warning_msg += 'Полный текст письма можно посмотреть в своей почте.\n'

                    warning_msg += '\n{:-<8}\n'.format('')

                    # Добавить в Telegram-уведомление имена получателей,
                    # которых данное письмо не касается
                    # Add to the Telegram-notification the names of recipients
                    # whom this letter does not concern.
                    for name in recepints_for_incomplete_telegram_notification_list:
                        warning_msg += name + '\n'

                    warning_msg += '{:-<8}\n'.format('')
                    # warning_msg += 'This event has nothing to do with you.!'
                    warning_msg += 'Это событие не имеет к вам ' \
                                   'никакого отношения!'

                    # Отправка неполного уведомления в Telegram-чат
                    # Sending an incomplete notification to Telegram chat
                    send_telegram(warning_msg, date_time=date_time_discovery)

        mail.close()

    # Отключение от IMA4-сервера
    # Disconnect from IMA4 server
    mail.logout()

    # Подчистить по необходимости кэш
    # Clean up by need cache
    for id_on_the_cache in letters_on_the_cache_list:
        if id_on_the_cache not in letters_on_the_server_list:
            # Если письмо находится в кэше, но на IMA4-сервере
            # в папке "INBOX" его уже нет
            # If the message is in the cache, but on the
            # IMA4 server in the "INBOX" folder it is no longer there

            # EML-файл, подлежащий удалению из кэша
            # EML file to be removed from the cache
            f_eml_src = EML_PATH + id_on_the_cache + '.eml'

            # ZIP-файл, подлежащий перемещению в папку истории
            # ZIP file to be moved to the history folder
            f_zip_src = EML_PATH + id_on_the_cache + '.zip'
            f_zip_dst = EML_PATH_READY + id_on_the_cache + '.zip'

            try:
                # Удаление EML-файла из кэша
                # Remove EML file from cache
                # print(f'Remove             : {f_eml_src}')
                print(f'Удаление           : {f_eml_src}')
                if os.path.isfile(f_eml_src):
                    os.remove(f_eml_src)
            except OSError as e:
                # print(f'Error deleting file {FLR}{f_eml_src}')
                print(f'Ошибка удаления файла {FLR}{f_eml_src}')
                print(f'{FLR}{e.filename}{FR}: {FLR}{e.strerror}')
                sys.exit(-1)

            try:
                # Перемещение ZIP-файла в папку истории (ready)
                # Move the ZIP file to the history folder (ready)
                # print(f'Move               : {f_zip_src} в {f_zip_dst}')
                print(f'Перемещение        : {f_zip_src} в {f_zip_dst}')
                if os.path.isfile(f_zip_src):
                    os.rename(f_zip_src, f_zip_dst)
            except OSError as e:
                # print(f'Error moving file {FLR}{f_zip_src}{FR} в {FLR}{f_zip_dst}')
                print(
                    f'Ошибка перемещения файла {FLR}{f_zip_src}{FR} в {FLR}{f_zip_dst}'
                )
                print(f'{FLR}{e.filename}{FR}: {FLR}{e.strerror}')
                sys.exit(-1)

    # Печать статистики
    # Printing statistics
    if count_found > 0:
        print('\n{:-<80}'.format(''))

    # print(f'Total important    : {FLG}{count_found}')
    # print(f'End of scan        : {FLC}{"{:%d.%m.%Y %H:%M:%S}".format(datetime.now())}')
    print(f'Из них важных      : {FLG}{count_found}')
    print(
        f'Конец сканирования : {FLC}{"{:%d.%m.%Y %H:%M:%S}".format(datetime.now())}'
    )
    print('{:-<80}\n'.format(''))
    print(f'{SR}')

コード例 #17

0

ファイルを表示

ファイル: Some.py プロジェクト: SanamAhmed/curious_henry

def upload_file_():
    try:
        print("Here in uploader")
        file = request.files['file']
        pname = request.form['pname']
        print(file)
        print(pname)

        if request.method == 'POST':
            file = request.files['file']
            pname = request.form['pname']
            lang = request.form['lang']

            if file.filename == '':
                print("file name is empty")
                return redirect(url_for('addfiles.html',
                                        message='No selected file'))
            if file and allowed_file(file.filename):
                filename = secure_filename(file.filename)
                file.filename.replace(" ","_")
                print("file",file.filename)

                file.save(os.path.join(app.config['UPLOAD_PATH_PDF'], filename))
                datetime_now = datetime.datetime.now();
                formatted_date = datetime_now.strftime('%Y-%m-%d')
                db = pymysql.connect(app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"],
                                     app.config["DATABASE"])
                cur = db.cursor()
                sql = 'INSERT INTO Project_Files (FileName,ProjectName,ProjectUserID,UploadDate,UploadPath,Nodes,Edges,FileEntities,URL) VALUES (%s,%s, %s ,%s,%s,%s,%s,%s,%s)'
                entityExtractor_ = None
                document_url = None
                if (".docx" in file.filename):
                    with open(app.config['UPLOAD_PATH_PDF'] + file.filename, "rb") as docx_file:
                        result = mammoth.convert_to_html(docx_file)
                        html = result.value  # The generated HTML

                    temp = file.filename.replace(".docx", "")
                    Html_file = open(app.config['UPLOAD_PATH_PDF'] + temp + ".html", "w")
                    Html_file.write(html)
                    Html_file.close()
                    document_url = "http://george.runmy.tech:5000/static/web/"+ temp + ".html"

                    entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + file.filename,pname.strip("'"),document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"],
                         app.config["DATABASE"],file.filename)
                    entityExtractor_.getEntityDocxJson()

                elif(".txt" in file.filename):
                    temp = file.filename.replace(".txt", "")
                    data = ""
                    html = "<html></html>"
                    soup = BeautifulSoup(html)
                    htmltag = soup.find('html')
                    body = soup.new_tag("body")
                    with open(app.config['UPLOAD_PATH_PDF'] + file.filename, "r") as myfile:
                        data = myfile.read()
                    paras = data.split("\n\n")
                    for para in paras:
                        html = "<p></p>"
                        souppara = BeautifulSoup(html)
                        ptag = souppara.find('p')
                        ptag.insert(0, NavigableString(para))
                        body.append(ptag)
                    htmltag.append(body)
                    html_page = soup.prettify("utf-8")
                    with open(app.config['UPLOAD_PATH_PDF'] + temp + ".html", "wb+") as filewriter:
                        filewriter.write(html_page)
                    document_url = "http://george.runmy.tech:5000/static/web/" + temp + ".html"
                    entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + filename,pname.strip("'"), document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"],
                         app.config["DATABASE"],file.filename)
                    entityExtractor_.getEntityTxtJson()
                elif(".msg" in file.filename):
                    #pythoncom.CoInitialize()
                    #outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
                    #temp = file.filename.replace(".msg", "")

                    #msg = outlook.OpenSharedItem(app.config['UPLOAD_PATH_PDF']+ file.filename)
                    #data = msg.Body
                    #os.system('cd /home/sanam/Test ; msgconvert chunmun.msg')
                    print('cd '+app.config['UPLOAD_PATH_PDF']+'; '+'msgconvert '+file.filename)
                    os.system('cd '+app.config['UPLOAD_PATH_PDF']+'; '+'msgconvert '+file.filename )
                    with open(app.config['UPLOAD_PATH_PDF']+file.filename+'.eml',
                              'rb') as fp:  # select a specific email file from the list
                        msg = BytesParser(policy=policy.default).parse(fp)
                    data = msg.get_body(preferencelist=('plain')).get_content()
                    #print(text)  # print the email content

                    print(data)
                    temp = file.filename.replace(".msg", "")
                    html = "<html></html>"
                    soup = BeautifulSoup(html)
                    htmltag = soup.find('html')
                    body = soup.new_tag("body")
                    paras = data.split("\n\n")
                    for para in paras:
                        html = "<p></p>"
                        souppara = BeautifulSoup(html,features="lxml")
                        ptag = souppara.find('p')
                        ptag.insert(0, NavigableString(para))
                        body.append(ptag)
                    htmltag.append(body)
                    html_page = soup.prettify("utf-8")
                    with open(app.config['UPLOAD_PATH_PDF'] + temp + ".html", "wb+") as filewriter:
                        filewriter.write(html_page)
                    with open(app.config['UPLOAD_PATH_PDF'] + temp + ".txt", "w+") as filewriter:
                        print("Converting .msg into Text")
                        filewriter.write(data)
                        print("Converted .msg into Text")

                    document_url = "http://george.runmy.tech:5000/static/web/" + temp + ".html"
                    entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + temp+".txt", pname.strip("'"),document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"],
                         app.config["DATABASE"],file.filename)
                    print("Stucked")
                    entityExtractor_.getEntityTxtJson()


                else:
                    temp = file.filename.replace(".pdf", "")
                    document_url = "http://george.runmy.tech:5000/static/web/viewer.html?file=" + file.filename
                    entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + file.filename,pname.strip("'"),document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"],
                         app.config["DATABASE"],file.filename)
                    searchable = entityExtractor_.isSearchablePDF();
                    if(searchable):
                        entityExtractor_.getEntityPDFJson()
                    else:
                        # OCR
                        print("Have to do OCR")

                        document_url = "http://george.runmy.tech:5000/static/web/viewer.html?file=" + file.filename
                        OCR.pdf_splitter(app.config['UPLOAD_PATH_PDF'] + filename, app.config['UPLOAD_PATH_PDF'] + temp+".txt", app.config['OCR_API_KEY'])
                        entityExtractor_ = EntityExtractor(lang,app.config['UPLOAD_PATH_PDF'] + temp + ".txt",pname.strip("'"), document_url,app.config['GOOGLE_API_KEY'],app.config['NLP_API_KEY'],app.config["DATABASEIP"], app.config["DB_USER"], app.config["DB_PASSWORD"],
                         app.config["DATABASE"],file.filename)
                        entityExtractor_.getEntityTxtJson()

                print(entityExtractor_.getEntities())
                print("some stuff")
                args = (file.filename, pname.strip("'"), session['user'].strip("'"), formatted_date.strip("'"),
                        app.config['UPLOAD_PATH_PDF'],entityExtractor_.getNodesList(),entityExtractor_.getEdgeList(),entityExtractor_.getEntities(),document_url)

                if(entityExtractor_ !=None):
                    del entityExtractor_

                    # Execute the SQL command
                cur.execute(sql, args)
                # Commit your changes in the database
                db.commit()

                # return redirect(url_for('success', n=str(email)))
                # session['user']=email
                db.close()
                return render_template('addfiles.html', email=session['user'],projectList=session['projectList'],message="File is successfully uploaded and processed")
            else:
                return render_template('addfiles.html',email=session['user'],projectList=session['projectList'],
                                        message='File Extension not allowed')

    except Exception as e:
        print("Error is here soooooooooo" + str(e))
        print(''.join(traceback.format_exception(etype=type(e), value=e, tb=e.__traceback__)))
        return render_template('addfiles.html', projectList=session['projectList'],email=session['user'],message='Exception in file processing')

コード例 #18

0

ファイルを表示

def get_eml_body(eml_file):
    with open(eml_file, 'rb') as fp:
        msg = BytesParser(policy=policy.default).parse(fp)
        text = msg.get_body(preferencelist=('plain')).get_content()
        return text

コード例 #19

0

ファイルを表示

ファイル: mailbox.py プロジェクト: BorissowT/lab-grader

def process_students(imap_conn):
    """
    Do something with emails messages in the folder.  
    For the sake of this example, print some headers.
    """
    rv, data = imap_conn.select("INBOX")
    if rv != 'OK':
        print("ERROR: Unable to open mailbox ", rv)

    # rv, data = M.search(None, "ALL")
    # rv, data = M.uid('search', None, "ALL")
    rv, data = imap_conn.uid('search', None, "(UNSEEN)")
    if rv != 'OK':
        print("No messages found!")
        return

    # list of students
    students = []

    for uid in data[0].split():
        # rv, data = M.fetch(num, '(RFC822)')
        rv, data = imap_conn.uid('fetch', uid, '(RFC822)')
        if rv != 'OK':
            print("ERROR getting message {}".format(uid))
            return

        # see https://docs.python.org/3/library/email.examples.html for an email processing example
        msg = BytesParser(policy=policy.default).parsebytes(data[0][1])
        # msg = email.message_from_bytes(data[0][1], policy=policy.default)
        # hdr = email.header.make_header(email.header.decode_header(msg['Subject']))
        # subject = str(hdr)
        subject = msg['subject']
        print('Message {}: {}'.format(uid, subject))
        print('Raw Date: {}'.format(msg['Date']))
        # Now convert to local date-time
        date_tuple = email.utils.parsedate_tz(msg['Date'])
        if date_tuple:
            local_date = datetime.datetime.fromtimestamp(
                email.utils.mktime_tz(date_tuple))
            print ("Local Date:", \
                local_date.strftime("%a, %d %b %Y %H:%M:%S"))
        # print(get_first_text_block(msg))
        # bodytext = msg.get_content()
        # print(bodytext)
        # If we want to print a preview of the message content, we can extract whatever
        # the least formatted payload is and print the first three lines.  Of course,
        # if the message has no plain text part printing the first three lines of html
        # is probably useless, but this is just a conceptual example.
        simplest = msg.get_body(preferencelist=('plain', 'html'))
        simplest_text = ''.join(
            simplest.get_content().splitlines(keepends=True))
        # print(simplest_text)

        # print(html2text.html2text(simplest_text))
        if True:
            soup = BeautifulSoup(simplest_text, features="lxml")
            # kill all script and style elements
            for script in soup(["script", "style"]):
                script.extract()  # rip it out
            # get text
            text = soup.get_text(separator='\n')
            # break into lines and remove leading and trailing space on each
            lines = (line.strip() for line in text.splitlines())
            # break multi-headlines into a line each
            chunks = (phrase.strip() for line in lines
                      for phrase in line.split("  "))
            # drop blank lines
            text_chunks = [chunk for chunk in chunks if chunk]
            text = '\n'.join(text_chunks)
            # print(text)
            if len(text_chunks) >= 3:
                print("Group: {}".format(text_chunks[0]))
                print("Name: {}".format(text_chunks[1]))
                print("Repo name: {}".format(text_chunks[2]))
                # make uppercase and swap all valid non-numeric characters to english
                group = text_chunks[0].upper().replace('М', 'M').replace(
                    'В', 'V').replace('З', 'Z').replace('К', 'K')
                # remove all invalid characters
                group = ''.join([c for c in group if c in '0123456789MVZK'])
                # normalize unicode string
                # e.g. substitute non-breaking space ('\xa0')
                # with normal space; see https://stackoverflow.com/a/34669482
                name = unicodedata.normalize("NFKC", text_chunks[1])
                students.append({
                    'group': "'{}'".format(group),
                    'raw_group': text_chunks[0],
                    'name': name,
                    'github': text_chunks[2],
                    'email': msg['from'],
                    'uid': uid
                })
            else:
                print(
                    "Error! Unable to parse email body. There should be at least 3 lines of text in the email."
                )
        # print(msg.keys())
        print("")
    return students

コード例 #20

0

ファイルを表示

ファイル: FileProcessor.py プロジェクト: SanamAhmed/curious_henry

    def process(self, projectFile_):
        entityExtractor_ = None
        document_url = None
        if (".docx" in projectFile_.FileName):
            with open(projectFile_.UploadPath + projectFile_.FileName,
                      "rb") as docx_file:
                result = mammoth.convert_to_html(docx_file)
                html = result.value  # The generated HTML

            temp = projectFile_.FileName.replace(".docx", "")
            Html_file = open(projectFile_.UploadPath + temp + ".html", "w")
            Html_file.write(html)
            Html_file.write(html)
            Html_file.close()
            document_url = self.uploadurl + temp + ".html"
            title = self.extractTitle(projectFile_.UploadPath + temp + ".html",
                                      projectFile_.lang)
            print("Here in File Processor")
            print(self.uploadurl)
            print(document_url)
            entityExtractor_ = EntityAndRelationBuilder(
                projectFile_.lang,
                projectFile_.UploadPath + projectFile_.FileName,
                projectFile_.ProjectName.strip("'"), document_url,
                projectFile_.FileName, title)
            entityExtractor_.getEntityDocxJson()

        elif (".txt" in projectFile_.FileName):
            temp = projectFile_.FileName.replace(".txt", "")
            data = ""
            html = "<html></html>"
            soup = BeautifulSoup(html)
            htmltag = soup.find('html')
            body = soup.new_tag("body")
            with open(projectFile_.UploadPath + projectFile_.FileName,
                      "r") as myfile:
                data = myfile.read()
            print(data)
            paras = data.split("\n\n")
            for para in paras:
                html = "<p></p>"
                souppara = BeautifulSoup(html)
                ptag = souppara.find('p')
                ptag.insert(0, NavigableString(para))
                body.append(ptag)
            htmltag.append(body)
            html_page = soup.prettify("utf-8")
            with open(projectFile_.UploadPath + temp + ".html",
                      "wb+") as filewriter:
                filewriter.write(html_page)
            document_url = self.uploadurl + temp + ".html"
            title = self.extractTitle(projectFile_.UploadPath + temp + ".html",
                                      projectFile_.lang)
            print("Language is:" + projectFile_.lang)
            entityExtractor_ = EntityAndRelationBuilder(
                projectFile_.lang,
                projectFile_.UploadPath + projectFile_.FileName,
                projectFile_.ProjectName.strip("'"), document_url,
                projectFile_.FileName, title)
            entityExtractor_.getEntityTxtJson()
        elif (".msg" in projectFile_.FileName):
            '''            pythoncom.CoInitialize()
            outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
            temp = projectFile_.FileName.replace(".msg", "")

            msg = outlook.OpenSharedItem(projectFile_.UploadPath+ projectFile_.FileName)
            data = msg.Body
            '''
            print('cd ' + projectFile_.UploadPath + '; ' + 'msgconvert ' +
                  projectFile_.FileName)
            os.system('cd ' + projectFile_.UploadPath + '; ' + 'msgconvert ' +
                      projectFile_.FileName)
            with open(
                    projectFile_.UploadPath + projectFile_.FileName + '.eml',
                    'rb') as fp:  # select a specific email file from the list
                msg = BytesParser(policy=policy.default).parse(fp)
            data = msg.get_body(preferencelist=('plain')).get_content()
            with open(
                    projectFile_.UploadPath + projectFile_.FileName + '.eml',
                    'r+') as fhp:  # select a specific email file from the list
                headers = Parser().parse(fhp)
            print(headers["to"])
            print(headers["from"])
            print(headers["subject"])

            print(data)
            temp = projectFile_.FileName.replace(".msg.eml", "")
            html = "<html></html>"
            soup = BeautifulSoup(html)
            htmltag = soup.find('html')
            body = soup.new_tag("body")
            paras = data.split("\n\n")
            for para in paras:
                html = "<p></p>"
                souppara = BeautifulSoup(html, features="lxml")
                ptag = souppara.find('p')
                ptag.insert(0, NavigableString(para))
                body.append(ptag)
            htmltag.append(body)
            html_page = soup.prettify("utf-8")
            with open(projectFile_.UploadPath + temp + ".html",
                      "wb+") as filewriter:
                filewriter.write(html_page)
            with open(projectFile_.UploadPath + temp + ".txt",
                      "w+") as filewriter:
                print("Converting .msg into Text")
                filewriter.write(data)
                print("Converted .msg into Text")
            filename = projectFile_.UploadPath + temp + ".txt"
            document_url = self.uploadurl + temp + ".html"
            entityExtractor_ = EmailRelationExtractor(
                projectFile_.lang, filename,
                projectFile_.ProjectName.strip("'"), document_url,
                projectFile_.FileName, headers["to"], headers["from"],
                headers["subject"])
            print("Stucked")
            entityExtractor_.getEntityTxtJson()

        else:
            temp = projectFile_.FileName.replace(".pdf", "")
            title = ""
            document_url = self.uploadurl + "viewer.html?file=" + projectFile_.FileName
            entityExtractor_ = EntityAndRelationBuilder(
                projectFile_.lang,
                projectFile_.UploadPath + projectFile_.FileName,
                projectFile_.ProjectName.strip("'"), document_url,
                projectFile_.FileName, title)
            searchable = entityExtractor_.isSearchablePDF()
            if (searchable):
                entityExtractor_.getEntityPDFJson()
            else:
                # OCR
                print("Have to do OCR")

                document_url = self.uploadurl + "viewer.html?file=" + projectFile_.FileName
                OCR_FileName = projectFile_.UploadPath + temp + ".txt"
                OCR.pdf_splitter(
                    projectFile_.UploadPath + projectFile_.FileName,
                    OCR_FileName, self.ocrapikey)
                title = self.extractTitleText(OCR_FileName, projectFile_.lang)
                entityExtractor_ = EntityAndRelationBuilder(
                    projectFile_.lang, OCR_FileName,
                    projectFile_.ProjectName.strip("'"), document_url,
                    projectFile_.FileName, title)
                entityExtractor_.getEntityTxtJson()

コード例 #21

0

ファイルを表示

def get_email(text):
    pattern = r"(?<=Email: )(.*)(?=\/a>)"
    try:
        match = re.search(pattern, text).group(1)
        second_pattern = r"(?<=>)(.*)(?=<)"
        email = re.search(second_pattern, match).group(1)
        return email
    except:
        return 0


path = './mails/'

eml_files = glob.glob(path + '*.eml')
data = []
for eml_file in eml_files:
    with open(eml_file, 'rb') as fp:
        msg = BytesParser(policy=policy.default).parse(fp)
    text = str(msg.get_body(preferencelist=('html')))
    fp.close()

    #collecting findings
    name = get_name(text)
    email = get_email(text)
    if name != 0 and email != 0:
        data.append([name, email])

dataframe = pd.DataFrame(data, columns=["Names", "Emails"]).drop_duplicates()
dataframe.to_csv("data.csv", index=False)

コード例 #22

0

ファイルを表示

         f.close()
         print('Attachment found: ', part.get_filename())
 to = msg['to']
 fromEmail = msg['from']
 cc = msg['cc']
 subject = msg['subject']
 header = '<div style="background:white;"><b>From</b>: ' + fromEmail + '<br>'
 header += '<b>To</b>: ' + to + '<br>'
 if cc != None:
     header += '<b>CC</b>: ' + cc + '<br>'
 header += '<b>Subject</b>: ' + subject + '<br>'
 if len(attachmentNames) > 0:
     header += '<b>Attachment file name(s)</b>: ' + ', '.join(
         attachmentNames) + '<br>'
 header += '<br><hr><br></div>'
 simplest = msg.get_body(preferencelist=('html', 'plain')).get_content()
 simplest = header + '\n' + simplest
 pdfkit.from_string(simplest,
                    outputPath + '/' + fileName + '.pdf',
                    options=options)
 try:
     plainText = msg.get_body(preferencelist=('plain')).get_content()
     r.extract_keywords_from_text(plainText)
     keywords = r.get_ranked_phrases()[:10]
     doc = nlp(plainText)
     persons = dict(
         Counter([x.text for x in doc.ents if x.label_ == 'PERSON']))
     orgs = dict(
         Counter([x.text for x in doc.ents if x.label_ == 'ORG']))
     norp = dict(
         Counter([x.text for x in doc.ents if x.label_ == 'NORP']))

コード例 #23

0

ファイルを表示

ファイル: reader.py プロジェクト: ErikHorus1249/Python_scripts

import email
from email import policy
from email.parser import BytesParser
import glob
# file_list = glob.glob('*.eml') # returns list of files
file_name = "original_msg.eml"
with open(file_name, 'rb') as fp:  # select a specific email file from the list
    msg = BytesParser(policy=policy.default).parse(fp)
text = msg.get_body(preferencelist=('plain')).get_content()

# footer = msg.get_all()
print(text)  # print the email content

コード例 #24

0

ファイルを表示

    async def __run(self):
        # extract email from the recipient
        email_name = args.recipient.lower()
        try:
            email = await Email.objects.get(name=email_name)
        except NoMatch:
            logger.error('No recipient with this name')
            exit(1)

        # read mail from STDIN and parse to EmailMessage object
        message = BytesParser(policy=default).parsebytes(stdin.buffer.read())

        sender = ''
        if message.get('sender'):
            sender = message.get('sender')
        elif message.get('from'):
            sender = message.get('from')
        else:
            logger.error('No Sender of From header')
            exit(1)

        sender = parseaddr(sender)[1]
        if not sender:
            logger.error('Could not parse sender')
            exit(1)

        maybe_subscriber = await EmailSubscribers.objects.filter(email=sender
                                                                 ).all()
        if len(maybe_subscriber
               ) != 1 or maybe_subscriber[0].hood.id != email.hood.id:
            logger.error('Not a subscriber')
            exit(1)

        # extract relevant data from mail
        text = sub(
            r'<[^>]*>',
            '',
            message.get_body(preferencelist=('plain', 'html')).get_content(),
        )

        response = post(
            '%s/api/hoods/%d/email/messages/' %
            (config['root_url'], email.hood.pk),
            json={
                'text': text,
                'secret': email.secret
            },
        )
        if response.status_code == status.HTTP_201_CREATED:
            exit(0)
        elif response.status_code == status.HTTP_451_UNAVAILABLE_FOR_LEGAL_REASONS:
            logger.error('Message was\'t accepted: %s' % text)
        elif response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY:
            logger.error('Malformed request: %s' % response.json())
        elif response.status_code == status.HTTP_401_UNAUTHORIZED:
            logger.error(
                'Wrong API secret. kibicara_mda seems to be misconfigured')
        else:
            logger.error('REST-API failed with response status %d' %
                         response.status_code)
        exit(1)

コード例 #25

0

ファイルを表示

#SELECT Unsafe Login. Please contact [email protected] for help
#imap求每条命令前有一个标签，以便异步响应，所以调用imap._new_tag()
#发送数据是字节串，所以b修饰，末尾要有\r\n，否则服务器一直在等命令结束
imap.send( b'%s ID ("name" "zgzxxbot" "version" "1.0" "vendor" "J.D.zhu")\r\n' % imap._new_tag() )

#默认参数是INBOX,返回邮件数量 
print( imap.select() )

#response是一个列表；第一个元素是‘空格分隔的邮件号’
status, response = imap.search(None, '(UNSEEN)') 
unread_msg_nums = response[0].split() 

#因为BODY[ ]相当于RFC822，所以返回的是全部邮件内容
_, response = imap.fetch( unread_msg_nums[0],  '(UID BODY[])' ) 

#从字节串生成 EmailMessage 消息类
#如果是BODY[HEADER]也可以生成这个消息实例
msg = BytesParser(policy=default).parsebytes( response[0][1] )

print( msg['Subject'])
print( msg['Date'])
#返回的是 发件人名称<电子邮件地址> 形式
print( msg['From'] )
print( msg['To'] )
print( msg['Content-Type'])

#提取纯文本内容
print( msg.get_body('plain').get_content())

imap.logout()

コード例 #26

0

ファイルを表示

ファイル: check_email.py プロジェクト: ak545/monitoring-importan-emails

def send_email(message,
               recipient,
               attached_file=None,
               date_time=None,
               subject=None):
    """
    Отправка e-mail получателю
    Sending a email to the recipient
    :param message: str
    :param recipient: list
    :param attached_file: str
    :param date_time: str
    :param subject: str
    :return: None
    """
    msg_mime = MIMEMultipart('alternative')
    msg_mime['From'] = SMTP_SENDER
    msg_mime['To'] = recipient[1] + ' <' + recipient[0] + '>'
    if subject:
        # msg_mime['Subject'] = 'An important letter was found: [ ' + str(subject).strip() + ' ]'
        msg_mime['Subject'] = 'Обнаружено важное письмо: [ ' + str(
            subject).strip() + ' ]'
    else:
        # msg_mime['Subject'] = 'An important letter was found!'
        msg_mime['Subject'] = 'Обнаружено важное письмо!'

    body_text = '%BODY%'
    body_html = """\
    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
    </head>
    <html>
      <body marginwidth="0" \
      marginheight="0" leftmargin="0" topmargin="0" style="background-color:#F6F6F6; \
      font-family:Arial,serif; margin:0; padding:0; min-width: 100%; \
      -webkit-text-size-adjust:none; -ms-text-size-adjust:none;">
        <div style="width: auto; color:#000; background-color: #F4F5F7; \
        padding: 50px; display: inline-block;">
        %BODY%
        </div>
      </body>
    </html>
    """

    if date_time:
        today = date_time
    else:
        today = '{:%d.%m.%Y %H:%M:%S}'.format(datetime.now())
    # today = 'Date and time of discovery: ' + today
    today = 'Дата и время обнаружения: ' + today

    hl = '{:-<8}'.format('')

    eml_text = ''
    if attached_file:
        if Path(attached_file).is_file():
            with open(attached_file, 'rb+') as file:
                eml_msg = BytesParser(policy=policy.default).parse(file)
            eml_text_b = eml_msg.get_body()
            if eml_text_b:
                eml_text = eml_text_b.get_content()
                if eml_text:
                    eml_text = re.sub(r'<br.*?>', '\n', eml_text)
                    eml_text = re.sub(r'<.*?>', '', eml_text)
                    eml_text = str(eml_text).strip()

    # Для простой части
    # For part plain
    b_txt = ''
    # b_txt += '\nAn important letter was found!\n'
    b_txt += '\nОбнаружено важное письмо!\n'
    b_txt += today + '\n'
    b_txt += hl + '\n'
    b_txt += message.rstrip('\n') + '\n'
    b_txt += hl + '\n'
    # b_txt += 'The text of the original message:\n'
    b_txt += 'Текст оригинального сообщения:\n'
    b_txt += hl + '\n'
    b_txt += eml_text + '\n'
    body_text = body_text.replace('%BODY%', b_txt)

    # Для html части
    # For part html
    b_html = ''
    # b_html += '<br><b>An important letter was found!</b><br>'
    b_html += '<br><b>Обнаружено важное письмо!</b><br>'
    b_html += '<pre style="white-space: pre-wrap; word-wrap: break-word;">'
    b_html += today + '\n'
    b_html += hl + '\n'
    b_html += message.rstrip('\n') + '\n'
    b_html += hl + '\n'
    # b_html += 'The text of the original message:\n'
    b_html += 'Текст оригинального сообщения:\n'
    b_html += hl + '\n'
    b_html += eml_text + '\n'
    b_html += '</pre>'
    body_html = body_html.replace('%BODY%', b_html)

    part_plain = MIMEText(body_text, 'plain')
    part_html = MIMEText(body_html, 'html')

    msg_mime.attach(part_plain)
    msg_mime.attach(part_html)

    if attached_file:
        if Path(attached_file).is_file():
            eml_zipfile = Path(attached_file)
            eml_zipfile = eml_zipfile.with_suffix('.zip')

            if not Path(eml_zipfile).is_file():
                with zipfile.ZipFile(eml_zipfile, 'w',
                                     zipfile.ZIP_DEFLATED) as zipf:
                    zipf.write(attached_file, basename(attached_file))

            with open(eml_zipfile, 'rb+') as file:
                eml_part = MIMEApplication(file.read(),
                                           Name=basename(eml_zipfile))

            if Path(eml_zipfile).is_file():
                # print(f'\nEmail is sent      : {FLG}{recipient[0]} ({recipient[1]}){FR}\n'
                #       f'The attachment     : {FLG}{basename(eml_zipfile)} ({basename(attached_file)})')
                print(
                    f'\nОтправляется письмо: {FLG}{recipient[0]} ({recipient[1]}){FR}\n'
                    f'Вложение           : {FLG}{basename(eml_zipfile)} ({basename(attached_file)})'
                )
                print('{:-<80}'.format(''))
                eml_part[
                    'Content-Disposition'] = 'attachment; filename="%s"' % basename(
                        eml_zipfile)
                msg_mime.attach(eml_part)

    summary_message = msg_mime.as_string()

    server = None
    context = None
    # Пробуем подключиться к SMTP-серверу для отправки email
    # Try to connect to the SMTP server to send email
    try:
        if SMTP_SSL or SMTP_STARTTLS:
            # Если используется SSL или STARTTLS
            # If using SSL or STARTTLS

            # Создать безопасный SSL-контекст
            # Create a secure SSL context
            context = ssl.create_default_context()

            if SMTP_SSL:
                # Если используется SSL
                # If using SSL
                server = smtplib.SMTP_SSL(host=SMTP_SERVER,
                                          port=SMTP_PORT,
                                          context=context)
        else:
            # Если используется обычное подключение
            # If using normal connection
            server = smtplib.SMTP(SMTP_SERVER, SMTP_PORT)
            server.ehlo()

        if SMTP_STARTTLS:
            # Если используется STARTTLS
            # If STARTTLS is used

            # Безопасное соединение
            # Secure the connection
            server.starttls(context=context)
            server.ehlo()

        server.login(SMTP_SENDER, SMTP_PASSWORD)
        server.sendmail(SMTP_SENDER, recipient[0], summary_message)
    except Exception as e:
        # Печать любых сообщения об ошибках на стандартный вывод
        # Print any error messages to stdout
        print(f'{FLR}{e}')
    finally:
        server.quit()

コード例 #27

0

ファイルを表示

ファイル: viewsets.py プロジェクト: KHIT93/mailguardian

 def get_message_contents(self, request, pk=None):
     message = get_object_or_404(self.get_queryset(), pk=pk)
     data = {
         'message_id': message.id,
         'mailq_id': message.mailq_id,
         'message_contents': None
     }
     if message.mailscanner_hostname != settings.APP_HOSTNAME:
         token = Token.objects.get(user=request.user)
         host = MailScannerHost.objects.get(
             hostname=message.mailscanner_hostname)
         protocol = 'https' if host.use_tls else 'http'
         url = '{0}://{1}/api/messages/{2}/contents/'.format(
             protocol, host.hostname, pk)
         headers = {
             'Content-Type': 'application/json',
             'Authorization': 'Token {0}'.format(token.key)
         }
         result = requests.get(url, headers=headers)
         print(result)
         if result.status_code == 404:
             return Response({}, status.HTTP_404_NOT_FOUND)
         data = result.json()
     else:
         if not message.queue_file_exists():
             return Response({}, status.HTTP_404_NOT_FOUND)
         m = None
         data = {
             'message': {
                 'message_id': message.id,
                 'mailq_id': message.mailq_id
             }
         }
         with open(message.file_path(), 'rb') as fp:
             m = BytesParser(policy=policy.default).parse(fp)
         simplest = m.get_body(preferencelist=('plain', 'html'))
         richest = m.get_body()
         data['message']['simple_type'] = "{0}/{1}".format(
             simplest['content-type'].maintype,
             simplest['content-type'].subtype)
         data['message']['rich_type'] = "{0}/{1}".format(
             richest['content-type'].maintype,
             richest['content-type'].subtype)
         if simplest['content-type'].subtype == 'html':
             data['message']['simple_version'] = ''
         else:
             data['message']['simple_version'] = simplest
         if richest['content-type'].subtype == 'html':
             data['message']['rich_version'] = richest
         elif richest['content-type'].content_type == 'multipart/related':
             data['message']['rich_version'] = richest.get_body(
                 preferencelist=('html')).get_content().replace(
                     '<script>',
                     '&gt;script&lt;').replace('</scrpt>',
                                               '&gt;/script&lt;')
             data['message']['attachments'] = []
             for part in richest.iter_attachments():
                 data['message']['attachments'].append(part.get_filename())
         else:
             data['message']['rich_version'] = _('Preview unavailable')
     return Response(data)

コード例 #28

0

ファイルを表示

ファイル: run.py プロジェクト: ByamB4/Abuse-Mailbox-Dashboard

def readmails():
    import eml_parser, datetime, re
    from email import policy
    from email.parser import BytesParser

    def json_serial(obj):
        if isinstance(obj, datetime.datetime):
            serial = obj.isoformat()
            return serial

    KNOWN = [
        '*****@*****.**', '*****@*****.**', '*****@*****.**',
        '*****@*****.**', '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**', '*****@*****.**',
        '*****@*****.**', '*****@*****.**',
        '*****@*****.**', '*****@*****.**',
        '*****@*****.**',
        '*****@*****.**', '*****@*****.**',
        '*****@*****.**'
    ]
    ep = eml_parser.eml_parser
    end = len(os.listdir(MAIL_))
    for _ in range(1, end + 1):
        if os.path.exists(MAIL_ + '/' + str(_) + '/processed-data.json'):
            continue
        try:
            if LOG: log.write(f'[+] Reading: {_}\n')
            if CNS: print(f'[+] Reading: {_}')
            with open(MAIL_ + '/' + str(_) + '/main-content.elm', 'rb') as f:
                raw_email = f.read()
            parsed_eml = ep.decode_email_b(raw_email)
            parsed_json = json.loads(
                json.dumps(parsed_eml, default=json_serial))
            FROM = parsed_json['header']['from']
            SUBJECT = parsed_json['header']['subject']
            DATE = parsed_json['header']['date']
            CATEGORY, OUR_ADDRESS, VICTIM_ADDRESS = '', '', ''
            if FROM in KNOWN:
                with open(MAIL_ + '/' + str(_) + '/main-content.elm',
                          'rb') as f:
                    msg = BytesParser(policy=policy.default).parse(f)
                try:
                    TEXT = msg.get_body(preferencelist=('plain')).get_content()
                except:
                    pass
                if FROM == '*****@*****.**':
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0]
                elif FROM == '*****@*****.**':
                    if 'phishing' in TEXT: CATEGORY = 'phishing'
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[1]
                    VICTIM_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}',
                                                TEXT)[0]
                elif FROM == '*****@*****.**':
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}',
                                             SUBJECT)[0]
                    if 'botnet' in TEXT: CATEGORY = 'botnet'
                elif FROM == '*****@*****.**':
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}',
                                             SUBJECT)[0]
                    if 'Netscan' in SUBJECT: CATEGORY = 'scan'
                elif FROM in 'p2p.markmonitor.com':
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0]
                elif FROM == '*****@*****.**':
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0]
                elif FROM == '*****@*****.**':
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}',
                                             SUBJECT)[0]
                    print(OUR_ADDRESS)
                elif FROM == '*****@*****.**':
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}',
                                             SUBJECT)[0]
                    if 'ssh' in SUBJECT: CATEGORY = 'ssh'
                elif FROM == '*****@*****.**':
                    if 'DOS' in TEXT: CATEGORY = 'DOS'
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0]
                elif FROM == '*****@*****.**':
                    if 'DOS' in TEXT: CATEGORY = 'DOS'
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0]
                elif FROM == '*****@*****.**':
                    CATEGORY = 'wp-admin'
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}',
                                             SUBJECT)[0]
                elif FROM == '*****@*****.**':
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}', TEXT)[0]
                    if 'scan' in TEXT: CATEGORY = 'scan'
                elif FROM == '*****@*****.**':
                    CATEGORY = 'scan'
                    OUR_ADDRESS = re.findall(r'[0-9]+(?:\.[0-9]+){3}',
                                             SUBJECT)[0]
            else:
                if LOG: log.write('[-] Unknown mail\n')
                if CNS: print('[-] Unknown mail')
            if CNS:
                print('-' * 20)
                outdata = {}
                outdata['FROM'] = FROM
                outdata['SUBJECT'] = SUBJECT
                outdata['DATE'] = DATE
                if CATEGORY != '':
                    outdata['CATEGORY'] = CATEGORY
                if OUR_ADDRESS != '':
                    outdata['OUR_ADDRESS'] = OUR_ADDRESS
                if VICTIM_ADDRESS != '':
                    outdata['VICTIM_ADDRESS'] = VICTIM_ADDRESS
                print(outdata)
                open(MAIL_ + str(_) + '/processed-data.json',
                     'w').write(json.dumps(outdata))
                print('-' * 20)
        except Exception as e:
            if LOG: log.write(f'[-] Error: {e}\n')
            if CNS: print(f'[-] Error: {_}')
    if CNS: print(f'[+] Reading all mail successfully')
    if LOG: log.write(f'[+] Reading all mail successfully\n')

コード例 #29

0

ファイルを表示

ファイル: database.py プロジェクト: KajaDuff/flask_app_test

def display_eml(eml_filepath): ## -> treba vyladit!!!
    with open(eml_filepath, 'rb') as eml_file:

        msg = BytesParser(policy=policy.default).parse(eml_file)
        text = msg.get_body(preferencelist=('plain')).get_content()
        # sk = get_info_from_mail_field(msg['from'])
        # eml_output = eml_file.read()
        eml_output = msg
        # eml_output = msg #get_all('Content-Dispositio
        found = []
        for part in msg.walk():
            if 'content-disposition' not in part:
                continue
            cdisp = part['content-disposition'].split(';')
            cdisp = [x.strip() for x in cdisp]
            if cdisp[0].lower() != 'attachment':
                continue
            parsed = {}
            for kv in cdisp[1:]:
                key, val = kv.split('=')
                if val.startswith('"'):
                    val = val.strip('"')
                elif val.startswith("'"):
                    val = val.strip("'")
                parsed[key] = val
            found.append((parsed, part))
        eml_output = {
                     "Odesílatel": msg.get('From'),
                     "Příjemce": msg.get('To'),
                     "Datum": msg.get('Date'),
                     "Předmět": msg.get('Subject'),
                     "Text zprávy": msg.get_body(preferencelist=('plain')).get_content(),
                     "Přílohy": found #[0]
                     }
        #print('eml_output',eml_output, msg.get('Cc'))
        if msg.get_content_maintype() == 'multipart':  # <--zjisti zda potrebujes - jinak smaz
            # loop on the parts of the mail
            for part in msg.walk():
            # find the attachment part - so skip all the other parts
                if part.get_content_maintype() == 'multipart': continue
                if part.get_content_maintype() == 'text':
                    content = part.get_body(preferencelist=('plain'))
                    if content:
                        output = part.get_body(preferencelist=('plain')).get_content()
                    else:
                        output = None
                    continue
                if part.get('Content-Disposition') == 'inline': continue
                if part.get('Content-Disposition') is None: continue
                # save the attachment in the program directory
                result_dict = {
                     "Odesílatel": msg.get('From'),
                     "Příjemce": msg.get('To'),
                     "Datum": msg.get('Date'),
                     "Předmět": msg.get('Subject'),
                     "Text zprávy": output, #msg.get_body(preferencelist=('plain')).get_content(),
                     "Přílohy": part.get_all('Content-Disposition')
                     }
                #eml_output = result_dict
                #print('result_dict',result_dict)
    return eml_output

コード例 #30

0

ファイルを表示

class EmlParser():
    def __init__(self, fileName):
        self.message = BytesParser(policy=policy.default).parsebytes(
            readFile(fileName))

    def getId(self):
        return getHashOfItem(self.message)

    def getAttachmentData(self, name):
        for part in self.message.walk():
            if 'content-disposition' not in part:
                continue

            cdisp = part['content-disposition'].split(';')
            cdisp = [x.strip() for x in cdisp]

            if cdisp[0].lower() != 'attachment':
                continue
            parsed = {}

            for kv in cdisp[1:]:
                if kv.startswith('filename='):
                    key, _, val = kv.partition('=')

                    if val.startswith('"'):
                        val = val.strip('"')
                    elif val.startswith("'"):
                        val = val.strip("'")

                    if (name == val):
                        return part.get_payload(decode=True)

        return None

    def getAttachmentNames(self):
        found = []

        for part in self.message.walk():
            if 'content-disposition' not in part:
                continue

            cdisp = part['content-disposition'].split(';')
            cdisp = [x.strip() for x in cdisp]

            if cdisp[0].lower() != 'attachment':
                continue
            parsed = {}

            for kv in cdisp[1:]:
                if kv.startswith('filename='):
                    key, _, val = kv.partition('=')

                    if val.startswith('"'):
                        val = val.strip('"')
                    elif val.startswith("'"):
                        val = val.strip("'")

                    found.append(val)

        return found

    def getPayloadHtml(self):
        body = self.message.get_body('html')

        if (body):
            return self._decode_body(body.get_payload(decode=True))

        return ''

    def getPayloadPlain(self):
        body = self.message.get_body('plain')

        if (body):
            return self._decode_body(body.get_payload(decode=True))

        return ''

    def getSender(self):
        return extractEmails(str(self.message['from']))

    def getReceivers(self):
        return extractEmails(str(self.message['to']))

    def getSubject(self):
        return self._decode_entry(self.message['Subject'])

    def getDate(self):
        dt = parse(self.message['Date'])
        return str(dt.date()) + " " + str(dt.time())

    def _decode_entry(self, entry):
        if entry is None:
            entry = ''
        else:
            result = ''
            for part in decode_header(entry):
                if isinstance(part[0], str):
                    result += part[0]
                else:
                    encoding = part[1]
                    result += part[0].decode(encoding)

            entry = result

        return entry

    def _decode_body(self, entry):
        try:
            entry = entry.decode('utf-8')
        except UnicodeDecodeError:
            entry = entry.decode('latin-1')

        return entry

コード例 #31

-1

ファイルを表示

ファイル: utils.py プロジェクト: toulibre/PonyConf

def process_email(raw_email):
    msg = BytesParser(policy=policy.default).parsebytes(raw_email)
    body = msg.get_body(preferencelist=['plain'])
    content = body.get_payload(decode=True)

    charset = body.get_content_charset()
    if not charset:
        charset = chardet.detect(content)['encoding']
    content = content.decode(charset)

    regex = re.compile('^[^+@]+\+(?P<token>[a-zA-Z0-9]{80})@[^@]+$')

    for addr in msg.get('To', '').split(','):
        m = regex.match(addr.strip())
        if m:
            break

    if not m:
        raise NoTokenFoundException

    token = m.group('token')

    try:
        in_reply_to, author = process_new_token(token)
    except InvalidTokenException:
        in_reply_to, author = process_old_token(token)

    subject = msg.get('Subject', '')

    Message.objects.create(thread=in_reply_to.thread, in_reply_to=in_reply_to, author=author, subject=subject, content=content)