Example #1
0
    def extract_thread():
        try:
            args = ["./bin/ingest.sh", ingest_id, ingest_parent_dir, ingest_file, type]

            cherrypy.log("running pst: {}".format(" ".join(args)))
            spit(service_status_log, "[Running] {} \n".format(" ".join(args)))

            with open(ingester_log, 'w') as t:
                kwargs = {'stdout': t, 'stderr': t, 'cwd': base_dir, 'bufsize' : 1 }
                subp = subprocess.Popen(args, **kwargs)
                out, err = subp.communicate()

                # TODO should never see this line  - remove this
                cherrypy.log("complete: {}".format(fmtNow()))

                rtn = subp.returncode
                if rtn != 0:
                    spit(service_status_log, "[Error] return with non-zero code: {} \n".format(rtn))
                else:
                    spit(service_status_log, "[Done Ingesting data.  Reloading the email_addr cache.]")
                    initialize_email_addr_cache(ingest_id, update=True)
                    spit(service_status_log, "[Complete.]")
        except:
            error_info = sys.exc_info()[0]
            spit(service_status_log, "[Error] {}\n".format(error_info.replace('\n', ' ')))
Example #2
0
def download(data):
    user = data.get("user")

    if not user:
        return tangelo.HTTPStatusCode(400, "invalid service call missing user")

    passwd = data.get("pass")
    limit = data.get("limit", "2000")
    logfile = "{}/{}.log".format(work_dir, user)
    spit(logfile, "[Start] {}\n".format(user), True)
    cherrypy.log("logfile: {}".format(logfile))

    def download_thread():
        try:
            cherrypy.log("Thread Start User: {}".format(user))

            try:
                session = newman_email.login(user, passwd, logfile)
                fldr = "{}/emails/{}".format(webroot, user)
                cherrypy.log("Login User: {}".format(user))

                if os.path.exists(fldr):
                    rmrf(fldr)

                mkdir(fldr)

                spit("{}/output.csv".format(fldr),
                     newman_email.headerrow() + "\n")

                mkdir(fldr + "/emails")

                newman_email.download(session, user, fldr, int(limit), logfile)

                spit(logfile, "[Completed Download] {}\n".format(user))
            except Exception as ex:
                spit(logfile, "[Error] {}\n".format(ex))
                cherrypy.log("[Error] {}\n".format(ex))
            except:
                spit(logfile, "[Error]")
                cherrypy.log("[Error]")
                error_info = sys.exc_info()[0]
                cherrypy.log(error_info)
                spit(logfile,
                     "[Error] {}\n".format(error_info.replace('\n', ' ')))

            finally:
                newman_email.close_session(session)

        except:
            error_info = sys.exc_info()[0]
            cherrypy.log(error_info)
            spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))

    thr = threading.Thread(target=download_thread, args=())
    thr.start()
    tangelo.content_type("application/json")
    return {"id": user}
Example #3
0
def download(data):
    user = data.get("user")
    
    if not user:
        return tangelo.HTTPStatusCode(400, "invalid service call missing user")

    passwd = data.get("pass")
    limit = data.get("limit", "2000")
    logfile = "{}/{}.log".format(work_dir, user)
    spit(logfile, "[Start] {}\n".format(user), True)
    cherrypy.log("logfile: {}".format(logfile))

    def download_thread():
        try:
            cherrypy.log("Thread Start User: {}".format(user))

            try: 
                session = newman_email.login(user, passwd, logfile)
                fldr = "{}/emails/{}".format(webroot, user)    
                cherrypy.log("Login User: {}".format(user))

                if os.path.exists(fldr):
                    rmrf(fldr)

                mkdir(fldr)

                spit("{}/output.csv".format(fldr), newman_email.headerrow() + "\n")

                mkdir(fldr + "/emails")

                newman_email.download(session, user, fldr, int(limit), logfile)

                spit(logfile, "[Completed Download] {}\n".format(user))
            except Exception as ex:
                spit(logfile, "[Error] {}\n".format(ex))
                cherrypy.log("[Error] {}\n".format(ex))
            except:
                spit(logfile, "[Error]")
                cherrypy.log("[Error]")
                error_info = sys.exc_info()[0]
                cherrypy.log(error_info)
                spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))

            finally: 
                newman_email.close_session(session)

        except:
            error_info = sys.exc_info()[0]
            cherrypy.log(error_info)
            spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))

    thr = threading.Thread(target=download_thread, args=())
    thr.start()
    tangelo.content_type("application/json")
    return { "id" : user }
Example #4
0
def extract_pst(*args, **kwargs):
    cherrypy.log("search.extract_pst(kwargs[%s] %s)" % (len(kwargs), str(kwargs)))

    ingest_id=kwargs.get("ingest-id")
    ingest_file=kwargs.get("file")
    type=kwargs.get("type", "pst")

    # path = "{}/{}".format(ingest_parent_dir, type)
    if not ingest_id or not type or not ingest_file:
        raise TypeError("Encountered a 'None' value for 'email', 'type', or 'ingest_file!'")

    # Add the prefix for the newman indexes
    ingest_id = index_prefix+ingest_id

    logname = "pst_{}".format(fmtNow())
    ingester_log = "{}/{}.ingester.log".format(work_dir, logname)
    # errfile = "{}/{}.err.log".format(work_dir, logname)
    service_status_log = "{}/{}.status.log".format(work_dir, logname)

    spit(service_status_log, "[Start] email address={}\n".format(ingest_id), True)

    def extract_thread():
        try:
            args = ["./bin/ingest.sh", ingest_id, ingest_parent_dir, ingest_file, type]

            cherrypy.log("running pst: {}".format(" ".join(args)))
            spit(service_status_log, "[Running] {} \n".format(" ".join(args)))

            with open(ingester_log, 'w') as t:
                kwargs = {'stdout': t, 'stderr': t, 'cwd': base_dir, 'bufsize' : 1 }
                subp = subprocess.Popen(args, **kwargs)
                out, err = subp.communicate()

                # TODO should never see this line  - remove this
                cherrypy.log("complete: {}".format(fmtNow()))

                rtn = subp.returncode
                if rtn != 0:
                    spit(service_status_log, "[Error] return with non-zero code: {} \n".format(rtn))
                else:
                    spit(service_status_log, "[Done Ingesting data.  Reloading the email_addr cache.]")
                    initialize_email_addr_cache(ingest_id, update=True)
                    spit(service_status_log, "[Complete.]")
        except:
            error_info = sys.exc_info()[0]
            spit(service_status_log, "[Error] {}\n".format(error_info.replace('\n', ' ')))
            # cherrypy.log(error_info)

    thr = threading.Thread(target=extract_thread, args=())
    thr.start()
    tangelo.content_type("application/json")
    return {'log' : logname }
Example #5
0
def login(username, passwd, log):
    try:
        _session = imaplib.IMAP4_SSL('imap.gmail.com')
        resp, account = _session.login(username, passwd)
        cherrypy.log('login {}'.format(resp))
        if resp != 'OK':
            cherrypy.log("Failed login:"******"[Error] {}\n".format("Failed login:"******"Failed login: {} {}".format(resp, account))
        return _session
    except imaplib.IMAP4.error:
        cherrypy.log("Failed login: {}".format(username))
        spit(log, "[Error] {}\n".format("Failed login: {}".format(username)))
        raise Exception("Exception Failed login: {}".format(username))
Example #6
0
def login(username, passwd, log):
    try:
        _session = imaplib.IMAP4_SSL('imap.gmail.com')
        resp, account = _session.login(username, passwd)
        cherrypy.log('login {}'.format(resp))
        if resp != 'OK':
            cherrypy.log("Failed login:"******"[Error] {}\n".format("Failed login:"******"Failed login: {} {}".format(resp, account))
        return _session
    except imaplib.IMAP4.error:
        cherrypy.log("Failed login: {}".format(username))
        spit(log, "[Error] {}\n".format("Failed login: {}".format(username)))
        raise Exception("Exception Failed login: {}".format(username))
Example #7
0
 def extract_thread():
     args = ["./bin/pstextract.sh", email, pst_path]
     cherrypy.log("running pst: {}".format(" ".join(args)))
     spit(logfile, "[Running] {} \n".format(" ".join(args)))
     try:
         with open(teefile, 'w') as t, open(errfile, 'w') as e:
             kwargs = {
                 'stdout': t,
                 'stderr': e,
                 'cwd': base_dir,
                 'bufsize': 1
             }
             subp = subprocess.Popen(args, **kwargs)
             out, err = subp.communicate()
             cherrypy.log("complete: {}".format(fmtNow()))
             rtn = subp.returncode
             if rtn != 0:
                 spit(
                     logfile,
                     "[Error] return with non-zero code: {} \n".format(rtn))
             else:
                 spit(logfile, "[Complete]")
     except Exception:
         error_info = sys.exc_info()[0]
         cherrypy.log(error_info)
         spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))
Example #8
0
def extract(email_id, buff_mail, out_dir, categories, target_email):
    _dir = "{}/emails/{}".format(out_dir, email_id)
    mkdirp(_dir)
    #write raw email to new dir
    spit("{}/{}.eml".format(_dir, email_id), buff_mail)
    mail = email.message_from_string(buff_mail)
    attach=[]
    msg = ""
    attach_count = counter()

    for part in mail.walk():
        if part.get_content_type() == 'text/plain':
            msg = msg + "\n" + part.get_payload() 
        if part.get_content_type() == 'message/delivery-status':
            continue
        if part.get_content_maintype() == 'multipart':
            continue
        if part.get('Content-Disposition') is None:
            continue

        

        fileName = part.get_filename()
        fileName = fileName if fileName else "Attach_{}".format(attach_count.next())
        
        if fileName == 'rtf-body.rtf':
            continue

        fileName = clean_string(fileName, [
            EXPR_OPTS['fix_utf8'], 
            EXPR_OPTS['fix_forwardslash'], 
            (r' ', '_'),
            (r'&', '_')])

        attach.append(fileName)
        filePath = "{}/{}".format(_dir, fileName)        
        #save attachment
        fp = open(filePath, 'wb')
        fp.write(part.get_payload(decode=True))
        fp.close()
        
    msg = clean_string(msg, [EXPR_OPTS['fix_utf8']])
    spit("{}/{}.txt".format(_dir, email_id), msg)
    row= createRow(email_id, "emails/{}".format(email_id), target_email, mail, categories, attach, msg)

    return row
Example #9
0
def extract(email_id, buff_mail, out_dir, categories, target_email):
    _dir = "{}/emails/{}".format(out_dir, email_id)
    mkdirp(_dir)
    #write raw email to new dir
    spit("{}/{}.eml".format(_dir, email_id), buff_mail)
    mail = email.message_from_string(buff_mail)
    attach = []
    msg = ""
    attach_count = counter()

    for part in mail.walk():
        if part.get_content_type() == 'text/plain':
            msg = msg + "\n" + part.get_payload()
        if part.get_content_type() == 'message/delivery-status':
            continue
        if part.get_content_maintype() == 'multipart':
            continue
        if part.get('Content-Disposition') is None:
            continue

        fileName = part.get_filename()
        fileName = fileName if fileName else "Attach_{}".format(
            attach_count.next())

        if fileName == 'rtf-body.rtf':
            continue

        fileName = clean_string(fileName, [
            EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_forwardslash'], (r' ', '_'),
            (r'&', '_')
        ])

        attach.append(fileName)
        filePath = "{}/{}".format(_dir, fileName)
        #save attachment
        fp = open(filePath, 'wb')
        fp.write(part.get_payload(decode=True))
        fp.close()

    msg = clean_string(msg, [EXPR_OPTS['fix_utf8']])
    spit("{}/{}.txt".format(_dir, email_id), msg)
    row = createRow(email_id, "emails/{}".format(email_id), target_email, mail,
                    categories, attach, msg)

    return row
Example #10
0
def extract_pst(*args, **kwargs):
    email = kwargs.get("email")
    pst = kwargs.get("pst")
    pst_path = "{}/{}".format(pst_dir, pst)

    logname = "pst_{}".format(fmtNow())
    teefile = "{}/{}.tee.log".format(work_dir, logname)
    errfile = "{}/{}.err.log".format(work_dir, logname)
    logfile = "{}/{}.status.log".format(work_dir, logname)

    spit(logfile, "[Start] {}\n".format(email), True)

    def extract_thread():
        args = ["./bin/pstextract.sh", email, pst_path]
        cherrypy.log("running pst: {}".format(" ".join(args)))
        spit(logfile, "[Running] {} \n".format(" ".join(args)))
        try:
            with open(teefile, 'w') as t, open(errfile, 'w') as e:
                kwargs = {
                    'stdout': t,
                    'stderr': e,
                    'cwd': base_dir,
                    'bufsize': 1
                }
                subp = subprocess.Popen(args, **kwargs)
                out, err = subp.communicate()
                cherrypy.log("complete: {}".format(fmtNow()))
                rtn = subp.returncode
                if rtn != 0:
                    spit(
                        logfile,
                        "[Error] return with non-zero code: {} \n".format(rtn))
                else:
                    spit(logfile, "[Complete]")
        except Exception:
            error_info = sys.exc_info()[0]
            cherrypy.log(error_info)
            spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))

    thr = threading.Thread(target=extract_thread, args=())
    thr.start()
    tangelo.content_type("application/json")
    return {'log': logname}
Example #11
0
def changeConfig(data):
    target = data.get('target', None)
    database = data.get('database', None)
    host = data.get('host', None)    
    user = data.get('user', None)    
    password = data.get('password', None)
    filename = data.get('filename', 'target')
    
    fp = "{}/conf/server.conf".format(base_dir)
    conf = json.loads(slurp(fp)) if os.path.isfile(fp) else {}    
    zargs= zip(['target','database','host','user','password'], 
               [target, database, host, user, password ])
    args_map = {x:y for x,y in filter(lambda o: o[1], zargs)}
    config = dict(conf, **args_map)
    out = json.dumps(config, sort_keys=True, indent=4, separators=(',', ': '))
    spit(fp, out, True)
    spit('{}/conf/{}.cfg'.format(base_dir, filename), 'EMAIL_TARGET="{}"\n'.format(config["target"]), True)
    tangelo.content_type("application/json")    
    return { 'target' : config["target"], 'config' : filename + ".cfg" }
Example #12
0
def changeConfig(data):
    target = data.get('target', None)
    database = data.get('database', None)
    host = data.get('host', None)
    user = data.get('user', None)
    password = data.get('password', None)
    filename = data.get('filename', 'target')

    fp = "{}/conf/server.conf".format(base_dir)
    conf = json.loads(slurp(fp)) if os.path.isfile(fp) else {}
    zargs = zip(['target', 'database', 'host', 'user', 'password'],
                [target, database, host, user, password])
    args_map = {x: y for x, y in filter(lambda o: o[1], zargs)}
    config = dict(conf, **args_map)
    out = json.dumps(config, sort_keys=True, indent=4, separators=(',', ': '))
    spit(fp, out, True)
    spit('{}/conf/{}.cfg'.format(base_dir, filename),
         'EMAIL_TARGET="{}"\n'.format(config["target"]), True)
    tangelo.content_type("application/json")
    return {'target': config["target"], 'config': filename + ".cfg"}
Example #13
0
    def extract_thread():
        try:
            args = [
                "./bin/ingest.sh", ingest_id, ingest_parent_dir, ingest_file,
                type
            ]

            cherrypy.log("running pst: {}".format(" ".join(args)))
            spit(service_status_log, "[Running] {} \n".format(" ".join(args)))

            with open(ingester_log, 'w') as t:
                kwargs = {
                    'stdout': t,
                    'stderr': t,
                    'cwd': base_dir,
                    'bufsize': 1
                }
                subp = subprocess.Popen(args, **kwargs)
                out, err = subp.communicate()

                # TODO should never see this line  - remove this
                cherrypy.log("complete: {}".format(fmtNow()))

                rtn = subp.returncode
                if rtn != 0:
                    spit(
                        service_status_log,
                        "[Error] return with non-zero code: {} \n".format(rtn))
                else:
                    spit(
                        service_status_log,
                        "[Done Ingesting data.  Reloading the email_addr cache.]"
                    )
                    initialize_email_addr_cache(ingest_id, update=True)
                    spit(service_status_log, "[Complete.]")
        except:
            error_info = sys.exc_info()[0]
            spit(service_status_log,
                 "[Error] {}\n".format(error_info.replace('\n', ' ')))
Example #14
0
def extract_pst(*args, **kwargs):
    email=kwargs.get("email")
    pst=kwargs.get("pst")
    pst_path = "{}/{}".format(pst_dir, pst)

    logname = "pst_{}".format(fmtNow())
    teefile = "{}/{}.tee.log".format(work_dir, logname)
    errfile = "{}/{}.err.log".format(work_dir, logname)
    logfile = "{}/{}.status.log".format(work_dir, logname)

    spit(logfile, "[Start] {}\n".format(email), True)

    def extract_thread():
        args = ["./bin/pstextract.sh", email, pst_path]
        cherrypy.log("running pst: {}".format(" ".join(args)))
        spit(logfile, "[Running] {} \n".format(" ".join(args)))
        try:
            with open(teefile, 'w') as t, open(errfile, 'w') as e:
                kwargs = {'stdout': t, 'stderr': e, 'cwd': base_dir, 'bufsize' : 1 }
                subp = subprocess.Popen(args, **kwargs)
                out, err = subp.communicate()
                cherrypy.log("complete: {}".format(fmtNow()))
                rtn = subp.returncode
                if rtn != 0:
                    spit(logfile, "[Error] return with non-zero code: {} \n".format(rtn))
                else:
                    spit(logfile, "[Complete]")
        except Exception:
            error_info = sys.exc_info()[0]
            cherrypy.log(error_info)
            spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))

    thr = threading.Thread(target=extract_thread, args=())
    thr.start()
    tangelo.content_type("application/json")    
    return {'log' : logname }
Example #15
0
 def extract_thread():
     args = ["./bin/pstextract.sh", email, pst_path]
     cherrypy.log("running pst: {}".format(" ".join(args)))
     spit(logfile, "[Running] {} \n".format(" ".join(args)))
     try:
         with open(teefile, 'w') as t, open(errfile, 'w') as e:
             kwargs = {'stdout': t, 'stderr': e, 'cwd': base_dir, 'bufsize' : 1 }
             subp = subprocess.Popen(args, **kwargs)
             out, err = subp.communicate()
             cherrypy.log("complete: {}".format(fmtNow()))
             rtn = subp.returncode
             if rtn != 0:
                 spit(logfile, "[Error] return with non-zero code: {} \n".format(rtn))
             else:
                 spit(logfile, "[Complete]")
     except Exception:
         error_info = sys.exc_info()[0]
         cherrypy.log(error_info)
         spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))
def flush_buffer(f, buffer):
    if len(buffer) > 0:
        spit(f, "\n".join(buffer) + "\n")
Example #17
0
def download(srv, target_email, outdir, limit, logfile):
    srv.select("[Gmail]/All Mail", True)
    #resp, data = srv.uid('SEARCH', None, 'ALL')
    resp, data = srv.search(None, 'ALL')

    if resp != 'OK':
        err_msg = "Error searching: %s %s" % (resp, data)
        spit(logfile, "[Error] {}\n".format(err_msg))
        raise Exception(err_msg)

    msgids = data[0].split()

    if limit > 0:
        msgids = msgids[-limit:]

    attach_count = counter()
    c = counter()
    l = len(msgids)
    for msgid in msgids:
        try:
            uid = getUIDForMessage(srv, msgid)
            fldr ="emails/{}".format(uid)
            mkdir("{}/{}".format(outdir, fldr))

            i = c.next()
            if i % 200 == 0:
                spit(logfile, "[Downloading] Downloaded: {}/{}\n".format(i,l))

            resp, msgParts = srv.fetch(msgid, '(RFC822)')
            if resp != 'OK':
                err_msg = "Bad response: %s %s" % (resp, msgParts)
                spit(logfile, "[Error] {}\n".format(err_msg))
                raise Exception(err_msg)

            emailBody = msgParts[0][1]
            spit("{}/{}/{}.eml".format(outdir,fldr, uid), emailBody)
            mail = email.message_from_string(emailBody)
            attach = []
            msg=""
            for part in mail.walk():
                if part.get_content_type() == 'text/plain':
                    msg = msg + "\n" + part.get_payload() 
                if part.get_content_maintype() == 'multipart':
                    continue
                if part.get('Content-Disposition') is None:
                    continue

                fileName = part.get_filename()
                #escape file name
                fileName = fileName if fileName else "Attach_{}".format(attach_count.next())
                fileName = fileName.replace('/','_')
                attach.append(fileName)
                filePath = "{}/{}/{}".format(outdir, fldr, fileName)

                fp = open(filePath, 'wb')
                fp.write(part.get_payload(decode=True))
                fp.close()

            msg = re.sub(r'[^\x00-\x7F]',' ', msg)
            spit("{}/{}/{}.txt".format(outdir,fldr, uid), msg)
            row = createRow(uid, fldr, target_email, mail, attach, msg)
            spit("{}/output.csv".format(outdir), row + "\n")
        except Exception, e:
            spit(logfile, "[Downloading] [Exception]: line {}, msgid {}, except {}\n".format(i,msgid, str(e)))            
            continue
Example #18
0
    parser = argparse.ArgumentParser(
        description=" ... ", 
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=desc)
    parser.add_argument("-a","--header", action='store_true', help="add header to output")
    parser.add_argument("-s","--start", type=int, default=0, help="start at line #")
    parser.add_argument("-l", "--limit", type=int, default=0, help="end at line #")
    parser.add_argument("target_email", help="Target Email")
    parser.add_argument("out_dir", help="Output Directory")
    parser.add_argument("infile", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Input File")
    args = parser.parse_args()
    outfile = "{}/output.csv".format(args.out_dir)
    mkdirp("{}/emails".format(args.out_dir))
    if args.header:
        spit(outfile, email_extract.headerrow() + "\n")

    for i, line in enumerate(skip(args.infile, at_start=args.start)):
        if ((not args.limit == 0) and (i >= args.limit)):
            break;
        try:
            fp = line.strip()
            guid = email_extract.md5(fp)
            category = email_extract.categoryList(fp)
            buff = slurp(fp)

            row = email_extract.extract(guid, buff, args.out_dir, category, args.target_email)
            spit(outfile, row + "\n")
        except Exception as e:
            print "exception line: {} | {} ".format(i, e.message)
Example #19
0
def download(srv, target_email, outdir, limit, logfile):
    srv.select("[Gmail]/All Mail", True)
    #resp, data = srv.uid('SEARCH', None, 'ALL')
    resp, data = srv.search(None, 'ALL')

    if resp != 'OK':
        err_msg = "Error searching: %s %s" % (resp, data)
        spit(logfile, "[Error] {}\n".format(err_msg))
        raise Exception(err_msg)

    msgids = data[0].split()

    if limit > 0:
        msgids = msgids[-limit:]

    attach_count = counter()
    c = counter()
    l = len(msgids)
    for msgid in msgids:
        try:
            uid = getUIDForMessage(srv, msgid)
            fldr = "emails/{}".format(uid)
            mkdir("{}/{}".format(outdir, fldr))

            i = c.next()
            if i % 200 == 0:
                spit(logfile, "[Downloading] Downloaded: {}/{}\n".format(i, l))

            resp, msgParts = srv.fetch(msgid, '(RFC822)')
            if resp != 'OK':
                err_msg = "Bad response: %s %s" % (resp, msgParts)
                spit(logfile, "[Error] {}\n".format(err_msg))
                raise Exception(err_msg)

            emailBody = msgParts[0][1]
            spit("{}/{}/{}.eml".format(outdir, fldr, uid), emailBody)
            mail = email.message_from_string(emailBody)
            attach = []
            msg = ""
            for part in mail.walk():
                if part.get_content_type() == 'text/plain':
                    msg = msg + "\n" + part.get_payload()
                if part.get_content_maintype() == 'multipart':
                    continue
                if part.get('Content-Disposition') is None:
                    continue

                fileName = part.get_filename()
                #escape file name
                fileName = fileName if fileName else "Attach_{}".format(
                    attach_count.next())
                fileName = fileName.replace('/', '_')
                attach.append(fileName)
                filePath = "{}/{}/{}".format(outdir, fldr, fileName)

                fp = open(filePath, 'wb')
                fp.write(part.get_payload(decode=True))
                fp.close()

            msg = re.sub(r'[^\x00-\x7F]', ' ', msg)
            spit("{}/{}/{}.txt".format(outdir, fldr, uid), msg)
            row = createRow(uid, fldr, target_email, mail, attach, msg)
            spit("{}/output.csv".format(outdir), row + "\n")
        except Exception, e:
            spit(
                logfile,
                "[Downloading] [Exception]: line {}, msgid {}, except {}\n".
                format(i, msgid, str(e)))
            continue
Example #20
0
    def download_thread():
        try:
            cherrypy.log("Thread Start User: {}".format(user))

            try: 
                session = newman_email.login(user, passwd, logfile)
                fldr = "{}/emails/{}".format(webroot, user)    
                cherrypy.log("Login User: {}".format(user))

                if os.path.exists(fldr):
                    rmrf(fldr)

                mkdir(fldr)

                spit("{}/output.csv".format(fldr), newman_email.headerrow() + "\n")

                mkdir(fldr + "/emails")

                newman_email.download(session, user, fldr, int(limit), logfile)

                spit(logfile, "[Completed Download] {}\n".format(user))
            except Exception as ex:
                spit(logfile, "[Error] {}\n".format(ex))
                cherrypy.log("[Error] {}\n".format(ex))
            except:
                spit(logfile, "[Error]")
                cherrypy.log("[Error]")
                error_info = sys.exc_info()[0]
                cherrypy.log(error_info)
                spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))

            finally: 
                newman_email.close_session(session)

        except:
            error_info = sys.exc_info()[0]
            cherrypy.log(error_info)
            spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))
Example #21
0
            with execute_query(cnx.conn(), topic_stmt, _id) as qry:
                return [{
                    'name': formatName(nth(o, 0)),
                    'score': formatScore(nth(o, 1))
                } for o in qry.cursor()]

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            for row in qry.cursor():
                _id, _dir, _from, tos, ccs, bccs, subject, _date, attachments, body = row
                outdir = "demail/emails/{}/{}".format(args.target, _dir)
                outfile = "{}/{}.html".format(outdir, last(split(_id, "/")))
                topics = getTopics(_id)

                o = {
                    'doc': {
                        'topics': topics,
                        'id': _id,
                        'from': _from,
                        'to': "; ".join(split(tos, ';')),
                        'cc': "; ".join(split(ccs, ';')),
                        'bcc': "; ".join(split(bccs, ';')),
                        'subject': subject,
                        'date': _date,
                        'attachments': attachments,
                        'body': body
                    }
                }
                html = T.render(o)
                spit(outfile, html, True)
Example #22
0
    def ingest_thread():
        cherrypy.log("Ingest Started:")
        try:
            cherrypy.log("started: {}".format(fmtNow()))
            spit(logfile, "[Started] {} \n".format(fmtNow()))

            args = ["./bin/rebuild_all.sh"]
            cherrypy.log("running: {}".format(" ".join(args)))
            spit(logfile, "[Running] {} \n".format(" ".join(args)))

            with open(teefile, 'w') as t, open(errfile, 'w') as e:
                kwargs = {'stdout': t, 'stderr': e, 'cwd': base_dir}
                rebuildp = subprocess.Popen(args, **kwargs)
                out, err = rebuildp.communicate()
                cherrypy.log("rebuild complete: {}".format(fmtNow()))
                rtn = rebuildp.returncode
                if rtn != 0:
                    spit(
                        logfile,
                        "[Error] rebuild return with non-zero code: {} \n".
                        format(rtn))
                    return

            args = ["./bin/ingest.sh", cfg]
            cherrypy.log("running ingest: {}".format(" ".join(args)))
            spit(logfile, "[Running] {} \n".format(" ".join(args)))

            with open(teefile, 'w') as t, open(errfile, 'w') as e:
                kwargs = {
                    'stdout': t,
                    'stderr': e,
                    'cwd': base_dir,
                    'bufsize': 1
                }
                subp = subprocess.Popen(args, **kwargs)
                out, err = subp.communicate()
                cherrypy.log("complete: {}".format(fmtNow()))
                rtn = subp.returncode
                if rtn != 0:
                    spit(
                        logfile,
                        "[Error] return with non-zero code: {} \n".format(rtn))
                else:
                    spit(logfile, "[Complete]")
        except:
            error_info = sys.exc_info()[0]
            cherrypy.log(error_info)
            spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))
Example #23
0
def extract_pst(*args, **kwargs):
    cherrypy.log("search.extract_pst(kwargs[%s] %s)" %
                 (len(kwargs), str(kwargs)))

    ingest_id = kwargs.get("ingest-id")
    ingest_file = kwargs.get("file")
    type = kwargs.get("type", "pst")

    # path = "{}/{}".format(ingest_parent_dir, type)
    if not ingest_id or not type or not ingest_file:
        raise TypeError(
            "Encountered a 'None' value for 'email', 'type', or 'ingest_file!'"
        )

    # Add the prefix for the newman indexes
    ingest_id = index_prefix + ingest_id

    logname = "pst_{}".format(fmtNow())
    ingester_log = "{}/{}.ingester.log".format(work_dir, logname)
    # errfile = "{}/{}.err.log".format(work_dir, logname)
    service_status_log = "{}/{}.status.log".format(work_dir, logname)

    spit(service_status_log, "[Start] email address={}\n".format(ingest_id),
         True)

    def extract_thread():
        try:
            args = [
                "./bin/ingest.sh", ingest_id, ingest_parent_dir, ingest_file,
                type
            ]

            cherrypy.log("running pst: {}".format(" ".join(args)))
            spit(service_status_log, "[Running] {} \n".format(" ".join(args)))

            with open(ingester_log, 'w') as t:
                kwargs = {
                    'stdout': t,
                    'stderr': t,
                    'cwd': base_dir,
                    'bufsize': 1
                }
                subp = subprocess.Popen(args, **kwargs)
                out, err = subp.communicate()

                # TODO should never see this line  - remove this
                cherrypy.log("complete: {}".format(fmtNow()))

                rtn = subp.returncode
                if rtn != 0:
                    spit(
                        service_status_log,
                        "[Error] return with non-zero code: {} \n".format(rtn))
                else:
                    spit(
                        service_status_log,
                        "[Done Ingesting data.  Reloading the email_addr cache.]"
                    )
                    initialize_email_addr_cache(ingest_id, update=True)
                    spit(service_status_log, "[Complete.]")
        except:
            error_info = sys.exc_info()[0]
            spit(service_status_log,
                 "[Error] {}\n".format(error_info.replace('\n', ' ')))
            # cherrypy.log(error_info)

    thr = threading.Thread(target=extract_thread, args=())
    thr.start()
    tangelo.content_type("application/json")
    return {'log': logname}
def flush_buffer(f, buffer):
    if len(buffer) > 0:
        spit(f, "\n".join(buffer) + "\n")
Example #25
0
    def download_thread():
        try:
            cherrypy.log("Thread Start User: {}".format(user))

            try:
                session = newman_email.login(user, passwd, logfile)
                fldr = "{}/emails/{}".format(webroot, user)
                cherrypy.log("Login User: {}".format(user))

                if os.path.exists(fldr):
                    rmrf(fldr)

                mkdir(fldr)

                spit("{}/output.csv".format(fldr),
                     newman_email.headerrow() + "\n")

                mkdir(fldr + "/emails")

                newman_email.download(session, user, fldr, int(limit), logfile)

                spit(logfile, "[Completed Download] {}\n".format(user))
            except Exception as ex:
                spit(logfile, "[Error] {}\n".format(ex))
                cherrypy.log("[Error] {}\n".format(ex))
            except:
                spit(logfile, "[Error]")
                cherrypy.log("[Error]")
                error_info = sys.exc_info()[0]
                cherrypy.log(error_info)
                spit(logfile,
                     "[Error] {}\n".format(error_info.replace('\n', ' ')))

            finally:
                newman_email.close_session(session)

        except:
            error_info = sys.exc_info()[0]
            cherrypy.log(error_info)
            spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))
Example #26
0
        if i % chunk == 0:
            part = part + 1
            outfile = "{}/{}.part_{}".format(args.out_dir, "ingest",
                                             str(part).zfill(6))

        _id, _dir, attachs, subject, body = processLine(line)
        attach_array = []

        if attachs:
            for _i, a in enumerate(attachs.split(';')):
                attach_id = "attach_{}_{}".format(_id, _i)
                filePath = "demail/emails/{}/{}/{}".format(
                    args.email_addr, _dir, a)
                either = extractText(filePath)
                if isRight(either):
                    attach_json = toAttachItem(attach_id, _id, right(either),
                                               filePath)
                    attach_array.append(attach_json)

        index = {
            'id': _id,
            'subject': subject,
            'body': body,
            'attachments': attach_array
        }
        idx = json.dumps({"index": {"_id": _id}})
        doc = json.dumps(index)

        spit(outfile, "{}\n{}\n".format(idx, doc))
        i = i + 1
Example #27
0
                        "--limit",
                        type=int,
                        default=0,
                        help="end at line #")
    parser.add_argument("target_email", help="Target Email")
    parser.add_argument("out_dir", help="Output Directory")
    parser.add_argument("infile",
                        nargs='?',
                        type=argparse.FileType('r'),
                        default=sys.stdin,
                        help="Input File")
    args = parser.parse_args()
    outfile = "{}/output.csv".format(args.out_dir)
    mkdirp("{}/emails".format(args.out_dir))
    if args.header:
        spit(outfile, email_extract.headerrow() + "\n")

    for i, line in enumerate(skip(args.infile, at_start=args.start)):
        if ((not args.limit == 0) and (i >= args.limit)):
            break
        try:
            fp = line.strip()
            guid = email_extract.md5(fp)
            category = email_extract.categoryList(fp)
            buff = slurp(fp)

            row = email_extract.extract(guid, buff, args.out_dir, category,
                                        args.target_email)
            spit(outfile, row + "\n")
        except Exception as e:
            print "exception line: {} | {} ".format(i, e.message)
Example #28
0
    def ingest_thread():
        cherrypy.log("Ingest Started:")
        try:
            cherrypy.log("started: {}".format(fmtNow()))
            spit(logfile, "[Started] {} \n".format(fmtNow()))

            args = ["./bin/rebuild_all.sh"]
            cherrypy.log("running: {}".format(" ".join(args)))
            spit(logfile, "[Running] {} \n".format(" ".join(args)))

            with open(teefile, 'w') as t, open(errfile, 'w') as e:
                kwargs = {'stdout': t, 'stderr': e, 'cwd': base_dir }
                rebuildp = subprocess.Popen(args, **kwargs)
                out, err = rebuildp.communicate()
                cherrypy.log("rebuild complete: {}".format(fmtNow()))
                rtn = rebuildp.returncode
                if rtn != 0:
                    spit(logfile, "[Error] rebuild return with non-zero code: {} \n".format(rtn))
                    return
                    
            args = ["./bin/ingest.sh", cfg]
            cherrypy.log("running ingest: {}".format(" ".join(args)))
            spit(logfile, "[Running] {} \n".format(" ".join(args)))

            with open(teefile, 'w') as t, open(errfile, 'w') as e:
                kwargs = {'stdout': t, 'stderr': e, 'cwd': base_dir, 'bufsize' : 1 }
                subp = subprocess.Popen(args, **kwargs)
                out, err = subp.communicate()
                cherrypy.log("complete: {}".format(fmtNow()))
                rtn = subp.returncode
                if rtn != 0:
                    spit(logfile, "[Error] return with non-zero code: {} \n".format(rtn))
                else:
                    spit(logfile, "[Complete]")
        except:
            error_info = sys.exc_info()[0]
            cherrypy.log(error_info)
            spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))
Example #29
0
    def getTopics(_id):
        with newman_connector() as cnx:
            with execute_query(cnx.conn(), topic_stmt, _id) as qry:
                return [{'name': formatName(nth(o, 0)), 'score': formatScore(nth(o, 1)) } 
                        for o in qry.cursor()]

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            for row in qry.cursor():
                _id, _dir, _from, tos, ccs, bccs, subject, _date, attachments, body = row
                outdir = "demail/emails/{}/{}".format(args.target, _dir)
                outfile = "{}/{}.html".format(outdir, last(split(_id, "/")))
                topics = getTopics(_id)

                o = { 'doc': 
                      {
                          'topics' : topics,
                          'id': _id,
                          'from': _from,
                          'to' : "; ".join(split(tos, ';')),
                          'cc' : "; ".join(split(ccs, ';')),
                          'bcc': "; ".join(split(bccs, ';')),
                          'subject': subject,
                          'date': _date,
                          'attachments': attachments,
                          'body' : body
                      }}
                html= T.render(o)
                spit(outfile, html, True)

Example #30
0
    for line in args.infile:
        
        if i % 100 == 0:
            print "processed: {} ".format(i)

        if i % chunk == 0:
            part= part + 1
            outfile = "{}/{}.part_{}".format(args.out_dir, "ingest", str(part).zfill(6))

        _id, _dir, attachs, subject, body= processLine(line)
        attach_array = []

        if attachs:
            for _i, a in enumerate(attachs.split(';')):
                attach_id= "attach_{}_{}".format(_id, _i)
                filePath ="demail/emails/{}/{}/{}".format(args.email_addr, _dir, a)
                either = extractText(filePath)
                if isRight(either):
                    attach_json = toAttachItem(attach_id, _id, right(either), filePath)
                    attach_array.append(attach_json)

        index = { 'id' : _id, 'subject': subject, 'body' : body, 'attachments' : attach_array }
        idx = json.dumps({ "index": { "_id" : _id }})
        doc = json.dumps(index)

        spit(outfile, "{}\n{}\n".format(idx,doc))
        i=i+1