Esempio n. 1
0
 def __init__(self, directory, filename, extension, limit_megabytes=10):
     self.directory = directory
     self.filename = filename
     self.extension = extension 
     self.counter = counter(0)
     self.current_file = ''
     self.limit_bytes = limit_megabytes*1024*1024
Esempio n. 2
0
 def __init__(self, directory, filename, extension, limit_megabytes=10):
     self.directory = directory
     self.filename = filename
     self.extension = extension
     self.counter = counter(0)
     self.current_file = ''
     self.limit_bytes = limit_megabytes * 1024 * 1024
Esempio n. 3
0
def extract(email_id, buff_mail, out_dir, categories, target_email):
    _dir = "{}/emails/{}".format(out_dir, email_id)
    mkdirp(_dir)
    #write raw email to new dir
    spit("{}/{}.eml".format(_dir, email_id), buff_mail)
    mail = email.message_from_string(buff_mail)
    attach=[]
    msg = ""
    attach_count = counter()

    for part in mail.walk():
        if part.get_content_type() == 'text/plain':
            msg = msg + "\n" + part.get_payload() 
        if part.get_content_type() == 'message/delivery-status':
            continue
        if part.get_content_maintype() == 'multipart':
            continue
        if part.get('Content-Disposition') is None:
            continue

        

        fileName = part.get_filename()
        fileName = fileName if fileName else "Attach_{}".format(attach_count.next())
        
        if fileName == 'rtf-body.rtf':
            continue

        fileName = clean_string(fileName, [
            EXPR_OPTS['fix_utf8'], 
            EXPR_OPTS['fix_forwardslash'], 
            (r' ', '_'),
            (r'&', '_')])

        attach.append(fileName)
        filePath = "{}/{}".format(_dir, fileName)        
        #save attachment
        fp = open(filePath, 'wb')
        fp.write(part.get_payload(decode=True))
        fp.close()
        
    msg = clean_string(msg, [EXPR_OPTS['fix_utf8']])
    spit("{}/{}.txt".format(_dir, email_id), msg)
    row= createRow(email_id, "emails/{}".format(email_id), target_email, mail, categories, attach, msg)

    return row
Esempio n. 4
0
def extract(email_id, buff_mail, out_dir, categories, target_email):
    _dir = "{}/emails/{}".format(out_dir, email_id)
    mkdirp(_dir)
    #write raw email to new dir
    spit("{}/{}.eml".format(_dir, email_id), buff_mail)
    mail = email.message_from_string(buff_mail)
    attach = []
    msg = ""
    attach_count = counter()

    for part in mail.walk():
        if part.get_content_type() == 'text/plain':
            msg = msg + "\n" + part.get_payload()
        if part.get_content_type() == 'message/delivery-status':
            continue
        if part.get_content_maintype() == 'multipart':
            continue
        if part.get('Content-Disposition') is None:
            continue

        fileName = part.get_filename()
        fileName = fileName if fileName else "Attach_{}".format(
            attach_count.next())

        if fileName == 'rtf-body.rtf':
            continue

        fileName = clean_string(fileName, [
            EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_forwardslash'], (r' ', '_'),
            (r'&', '_')
        ])

        attach.append(fileName)
        filePath = "{}/{}".format(_dir, fileName)
        #save attachment
        fp = open(filePath, 'wb')
        fp.write(part.get_payload(decode=True))
        fp.close()

    msg = clean_string(msg, [EXPR_OPTS['fix_utf8']])
    spit("{}/{}.txt".format(_dir, email_id), msg)
    row = createRow(email_id, "emails/{}".format(email_id), target_email, mail,
                    categories, attach, msg)

    return row
Esempio n. 5
0
        " where t1.schema_name = 'email' "
        " and t1.predicate = 'from' "
        " and t2.predicate in ('to', 'cc', 'bcc') "
        " group by t1.obj, t2.obj "
        " ) as bi_dir "
        " GROUP BY source, target "
        " ) as lvn "
        " group by source, target ")

    nodes = []
    node_map = {}
    edges = []

    with newman_connector() as cnx:
        with execute_query(cnx.conn(), stmt) as qry:
            c = counter()
            for row in qry.cursor():
                src, target, weight = row

                if src not in node_map:
                    node_map[src] = c.next()
                    nodes.append({
                        'name': src,
                        'community': 'n/a',
                        'idx': node_map[src]
                    })

                if target not in node_map:
                    node_map[target] = c.next()
                    nodes.append({
                        'name': target,
Esempio n. 6
0
def download(srv, target_email, outdir, limit, logfile):
    srv.select("[Gmail]/All Mail", True)
    #resp, data = srv.uid('SEARCH', None, 'ALL')
    resp, data = srv.search(None, 'ALL')

    if resp != 'OK':
        err_msg = "Error searching: %s %s" % (resp, data)
        spit(logfile, "[Error] {}\n".format(err_msg))
        raise Exception(err_msg)

    msgids = data[0].split()

    if limit > 0:
        msgids = msgids[-limit:]

    attach_count = counter()
    c = counter()
    l = len(msgids)
    for msgid in msgids:
        try:
            uid = getUIDForMessage(srv, msgid)
            fldr ="emails/{}".format(uid)
            mkdir("{}/{}".format(outdir, fldr))

            i = c.next()
            if i % 200 == 0:
                spit(logfile, "[Downloading] Downloaded: {}/{}\n".format(i,l))

            resp, msgParts = srv.fetch(msgid, '(RFC822)')
            if resp != 'OK':
                err_msg = "Bad response: %s %s" % (resp, msgParts)
                spit(logfile, "[Error] {}\n".format(err_msg))
                raise Exception(err_msg)

            emailBody = msgParts[0][1]
            spit("{}/{}/{}.eml".format(outdir,fldr, uid), emailBody)
            mail = email.message_from_string(emailBody)
            attach = []
            msg=""
            for part in mail.walk():
                if part.get_content_type() == 'text/plain':
                    msg = msg + "\n" + part.get_payload() 
                if part.get_content_maintype() == 'multipart':
                    continue
                if part.get('Content-Disposition') is None:
                    continue

                fileName = part.get_filename()
                #escape file name
                fileName = fileName if fileName else "Attach_{}".format(attach_count.next())
                fileName = fileName.replace('/','_')
                attach.append(fileName)
                filePath = "{}/{}/{}".format(outdir, fldr, fileName)

                fp = open(filePath, 'wb')
                fp.write(part.get_payload(decode=True))
                fp.close()

            msg = re.sub(r'[^\x00-\x7F]',' ', msg)
            spit("{}/{}/{}.txt".format(outdir,fldr, uid), msg)
            row = createRow(uid, fldr, target_email, mail, attach, msg)
            spit("{}/output.csv".format(outdir), row + "\n")
        except Exception, e:
            spit(logfile, "[Downloading] [Exception]: line {}, msgid {}, except {}\n".format(i,msgid, str(e)))            
            continue
Esempio n. 7
0
def download(srv, target_email, outdir, limit, logfile):
    srv.select("[Gmail]/All Mail", True)
    #resp, data = srv.uid('SEARCH', None, 'ALL')
    resp, data = srv.search(None, 'ALL')

    if resp != 'OK':
        err_msg = "Error searching: %s %s" % (resp, data)
        spit(logfile, "[Error] {}\n".format(err_msg))
        raise Exception(err_msg)

    msgids = data[0].split()

    if limit > 0:
        msgids = msgids[-limit:]

    attach_count = counter()
    c = counter()
    l = len(msgids)
    for msgid in msgids:
        try:
            uid = getUIDForMessage(srv, msgid)
            fldr = "emails/{}".format(uid)
            mkdir("{}/{}".format(outdir, fldr))

            i = c.next()
            if i % 200 == 0:
                spit(logfile, "[Downloading] Downloaded: {}/{}\n".format(i, l))

            resp, msgParts = srv.fetch(msgid, '(RFC822)')
            if resp != 'OK':
                err_msg = "Bad response: %s %s" % (resp, msgParts)
                spit(logfile, "[Error] {}\n".format(err_msg))
                raise Exception(err_msg)

            emailBody = msgParts[0][1]
            spit("{}/{}/{}.eml".format(outdir, fldr, uid), emailBody)
            mail = email.message_from_string(emailBody)
            attach = []
            msg = ""
            for part in mail.walk():
                if part.get_content_type() == 'text/plain':
                    msg = msg + "\n" + part.get_payload()
                if part.get_content_maintype() == 'multipart':
                    continue
                if part.get('Content-Disposition') is None:
                    continue

                fileName = part.get_filename()
                #escape file name
                fileName = fileName if fileName else "Attach_{}".format(
                    attach_count.next())
                fileName = fileName.replace('/', '_')
                attach.append(fileName)
                filePath = "{}/{}/{}".format(outdir, fldr, fileName)

                fp = open(filePath, 'wb')
                fp.write(part.get_payload(decode=True))
                fp.close()

            msg = re.sub(r'[^\x00-\x7F]', ' ', msg)
            spit("{}/{}/{}.txt".format(outdir, fldr, uid), msg)
            row = createRow(uid, fldr, target_email, mail, attach, msg)
            spit("{}/output.csv".format(outdir), row + "\n")
        except Exception, e:
            spit(
                logfile,
                "[Downloading] [Exception]: line {}, msgid {}, except {}\n".
                format(i, msgid, str(e)))
            continue
Esempio n. 8
0
        " and t1.predicate = 'from' "
        " and t2.predicate in ('to', 'cc', 'bcc') "
        " group by t1.obj, t2.obj "
        " ) as bi_dir "
        " GROUP BY source, target "
        " ) as lvn "
        " group by source, target "
    )

    nodes = []
    node_map = {}
    edges = []

    with newman_connector() as cnx:
        with execute_query(cnx.conn(), stmt) as qry:
            c = counter()    
            for row in qry.cursor():
                src, target, weight = row

                if src not in node_map:
                    node_map[src] = c.next()
                    nodes.append({'name': src, 
                                  'community': 'n/a', 
                                  'idx': node_map[src] })

                if target not in node_map:
                    node_map[target] = c.next()
                    nodes.append({'name': target, 
                                  'community': 'n/a', 
                                  'idx': node_map[target] })