Example #1
0
def writeRanks(ids):
    with newman_connector() as read_cnx1, newman_connector() as read_cnx, newman_connector() as write_cnx:
        with execute_query(read_cnx1.conn(), stmt) as qry:
            txid = Tx(read_cnx.conn()).next()
            print "tx: %s" % txid
            facts = Fact(write_cnx.conn(), autocommit=False)
            print "assigning ranks"
            for mail in qry.cursor():
                #print mail[0] #, "email_addr", "rank", ids.get(mail,'0'), txid
                facts.addFact(mail[0], "email_addr", "rank", ids.get(mail[0],'0'), txid)            

            print "commit"
            write_cnx.commit()
Example #2
0
def writeRanks(ids):
    with newman_connector() as read_cnx1, newman_connector(
    ) as read_cnx, newman_connector() as write_cnx:
        with execute_query(read_cnx1.conn(), stmt) as qry:
            txid = Tx(read_cnx.conn()).next()
            print "tx: %s" % txid
            facts = Fact(write_cnx.conn(), autocommit=False)
            print "assigning ranks"
            for mail in qry.cursor():
                #print mail[0] #, "email_addr", "rank", ids.get(mail,'0'), txid
                facts.addFact(mail[0], "email_addr", "rank",
                              ids.get(mail[0], '0'), txid)

            print "commit"
            write_cnx.commit()
Example #3
0
def queryEntity(email):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_email_entities_by_id,
                           email) as qry:
            tangelo.log("node-vals: %s" % qry.stmt)
            rtn = [r for r in qry.cursor()]
            return rtn if rtn else []
Example #4
0
def queryEmail(email):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_email_by_id, email) as qry:
            tangelo.log("node-vals: %s" % qry.stmt)
            rtn = qry.cursor().fetchone()
            tangelo.content_type("application/json")
            return rtn if rtn else []
Example #5
0
 def getTopics(_id):
     with newman_connector() as cnx:
         with execute_query(cnx.conn(), topic_stmt, _id) as qry:
             return [{
                 'name': formatName(nth(o, 0)),
                 'score': formatScore(nth(o, 1))
             } for o in qry.cursor()]
Example #6
0
def getExportable(*args):
    stmt = (" SELECT id, subject FROM email WHERE exportable='true' ")
    tangelo.content_type("application/json")
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [[str(val) for val in row] for row in qry.cursor()]
            return {"emails": rtn}
Example #7
0
def queryEmail(email):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_email_by_id, email) as qry:
            tangelo.log("node-vals: %s" % qry.stmt)
            rtn = qry.cursor().fetchone()
            tangelo.content_type("application/json")
            return rtn if rtn else []
Example #8
0
def getEdges(node_idx, field, args_array):
    with newman_connector() as read_cnx:
        tangelo.log("start edge query")
        with execute_query(*edgeQueryObj(read_cnx.conn(), field, args_array)) as qry:    
            tangelo.log("edges : %s" % qry.stmt)
            return [{"source": node_idx.get(from_), "target": node_idx.get(to_), "value": int(weight)} 
                    for from_, to_, weight in qry.cursor()]
Example #9
0
def setExportable(data):
    email = data.get('email', None)
    exportable = data.get('exportable', 'false')

    if not email:
        tangelo.content_type("application/json")
        stmt = (" UPDATE email SET exportable='false' ")
        with newman_connector() as read_cnx:
            with execute_nonquery(read_cnx.conn(), stmt) as qry:
                return {"success": "true"}
        #return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    tangelo.content_type("application/json")
    stmt = (" UPDATE email SET exportable= %s WHERE id = %s ")
    with newman_connector() as read_cnx:
        with execute_nonquery(read_cnx.conn(), stmt, exportable, email) as qry:
            return {"email": queryEmail(email)}
Example #10
0
def getExportable(*args):
    stmt = (
        " SELECT id, subject FROM email WHERE exportable='true' "
    )
    tangelo.content_type("application/json")        
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [[str(val) for val in row] for row in qry.cursor()]
            return { "emails" : rtn }
Example #11
0
def getDomains(*args):
    stmt = (
        "SELECT SUBSTRING_INDEX(email_addr, '@', -1) as eml, count(1) from email_addr group by eml"
    )
    tangelo.content_type("application/json")        
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [[str(val) for val in row] for row in qry.cursor()]
            return { "domains" : rtn }
Example #12
0
def getDomains(*args):
    stmt = (
        "SELECT SUBSTRING_INDEX(email_addr, '@', -1) as eml, count(1) from email_addr group by eml"
    )
    tangelo.content_type("application/json")
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [[str(val) for val in row] for row in qry.cursor()]
            return {"domains": rtn}
Example #13
0
def getNodeVals(field, args_array):
    """
    nodes should be the all of the emails an email addr is a part of and then all of then all of the email addr associated with that set of emails 
    """
    with newman_connector() as read_cnx:
        tangelo.log("start node query")
        with execute_query(*nodeQueryObj(read_cnx.conn(), field, args_array)) as qry:
            tangelo.log("node-vals: %s" % qry.stmt)
            return {item[0]: 
                    { 'num': int(item[4]+item[5]), 'comm_id': item[2], 'group_id': item[3], 'comm': item[1], 'rank': item[6] } for item in qry.cursor() }
Example #14
0
def setExportMany(data):
    emails = data.get('emails', [])
    exportable = 'true' if data.get('exportable', True) else 'false'
    stmt = (" UPDATE email SET exportable=%s WHERE id = %s ")
    with newman_connector() as cnx:
        for email in emails:
            with execute_nonquery(cnx.conn(), stmt, exportable, email) as qry:
                pass
    tangelo.content_type("application/json")
    return {'exported': emails}
Example #15
0
def topic_list(*args):
    category = nth(args, 0, 'all')
    with newman_connector() as read_cnx:
        stmt = (" select idx, value, docs from topic_category "
                " where category_id = %s "
                " order by idx ")
        with execute_query(read_cnx.conn(), stmt, category) as qry:
            rtn = [r for r in qry.cursor()]
            tangelo.content_type("application/json")
            return {"categories": rtn}
Example #16
0
def getRollup(*args):
    entity = urllib.unquote(nth(args, 0, ""))
    if not entity:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_entity_rollup_id, entity) as qry:
            rtn = qry.cursor().fetchone()
            tangelo.content_type("application/json")
            return {"rollupId": rtn}
Example #17
0
def getRollup(*args):
    entity = urllib.unquote(nth(args, 0, ''))
    if not entity:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_entity_rollup_id,
                           entity) as qry:
            rtn = qry.cursor().fetchone()
            tangelo.content_type("application/json")
            return {"rollupId": rtn}
Example #18
0
def ingestESTextResults(hits):
    stmt = ("insert into search_results (email_id) values (%s)")

    with newman_connector() as cnx:        
        with execute_query(cnx.conn(), ("delete from search_results")) as _:
            pass
        for hit in hits:
            with execute_query(cnx.conn(), stmt, hit["_id"]) as qry:        
                pass

        cnx.commit()
Example #19
0
def getRankedEmails(*args):
    tangelo.content_type("application/json")
    stmt = (
        " select email_addr, community, community_id, group_id, rank, total_received, total_sent "
        " from email_addr "
        " where rank > 0 "
        " order by cast(rank as decimal(4,4)) desc")
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [[str(val) for val in row] for row in qry.cursor()]
            return {"emails": rtn}
Example #20
0
def getTopRollup(*args):
    amt = urllib.unquote(nth(args, 0, ''))
    if not amt:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    stmt = stmt_top_rollup_entities + ("limit {0}".format(amt))
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [r for r in qry.cursor()]
            rtn = rtn if rtn else []
            tangelo.content_type("application/json")
            return {"entities": rtn}
Example #21
0
def getTopRollup(*args):
    amt = urllib.unquote(nth(args, 0, ""))
    if not amt:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    stmt = stmt_top_rollup_entities + ("limit {0}".format(amt))
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [r for r in qry.cursor()]
            rtn = rtn if rtn else []
            tangelo.content_type("application/json")
            return {"entities": rtn}
Example #22
0
def topic_list(*args):
    category=nth(args, 0, 'all')
    with newman_connector() as read_cnx:
        stmt = (
            " select idx, value, docs from topic_category "
            " where category_id = %s "
            " order by idx "
        ) 
        with execute_query(read_cnx.conn(), stmt, category) as qry:
            rtn = [r for r in qry.cursor()]
            tangelo.content_type("application/json")
            return { "categories" : rtn }
Example #23
0
def getRankedEmails(*args):
    tangelo.content_type("application/json")    
    stmt = (
        " select email_addr, community, community_id, group_id, rank, total_received, total_sent "
        " from email_addr "
        " where rank > 0 "
        " order by cast(rank as decimal(4,4)) desc" 
    )
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [[str(val) for val in row] for row in qry.cursor()]
            return { "emails" : rtn }
Example #24
0
def setExportable(data):
    email = data.get('email', None)
    exportable = data.get('exportable', 'false')

    if not email:
        tangelo.content_type("application/json")
        stmt = (
            " UPDATE email SET exportable='false' "	
        )
        with newman_connector() as read_cnx:
            with execute_nonquery(read_cnx.conn(), stmt) as qry:
                return { "success" : "true" }
        #return tangelo.HTTPStatusCode(400, "invalid service call - missing id")
    
    tangelo.content_type("application/json")
    stmt = (
        " UPDATE email SET exportable= %s WHERE id = %s "	
    )
    with newman_connector() as read_cnx:
        with execute_nonquery(read_cnx.conn(), stmt, exportable, email ) as qry:
            return { "email" : queryEmail(email) }
Example #25
0
def getEmails(colors, field, args_array):
    cols = ('num', 'directory', 'datetime', 'from', 'to', 'cc', 'bcc', 'subject', 'attach', 'bodysize')
    rows = []
    with newman_connector() as read_cnx:    
        tangelo.log("start email query")
        with execute_query(*emailQueryObj(read_cnx.conn(), field, args_array)) as qry:
            tangelo.log("emails : %s" % qry.stmt)
            for item in qry.cursor():
                row = dict(zip(cols, item))
                row["fromcolor"] = colors.get(row.get('from'))
                rows.append(row)
    return rows
Example #26
0
def setExportMany(data):
    emails = data.get('emails', [])
    exportable= 'true' if data.get('exportable', True) else 'false'
    stmt = (
        " UPDATE email SET exportable=%s WHERE id = %s "	
    )
    with newman_connector() as cnx:
        for email in emails: 
            with execute_nonquery(cnx.conn(), stmt, exportable, email) as qry:
                pass
    tangelo.content_type("application/json")
    return { 'exported' : emails }
Example #27
0
def getTarget(*args):
    # returns the users who's email is being analyzed
    #todo: read from file or config
    target = getOpt('target')
    stmt = (
        " select e.email_addr, e.community, e.community_id, e.group_id, e.total_received, e.total_sent, e.rank "
        " from email_addr e "
        " where e.email_addr = %s ")
    tangelo.content_type("application/json")
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, target) as qry:
            rtn = [[str(val) for val in row] for row in qry.cursor()]
            return {"email": rtn}
Example #28
0
def getTarget(*args):
    # returns the users who's email is being analyzed
    #todo: read from file or config 
    target = getOpt('target')
    stmt = (
        " select e.email_addr, e.community, e.community_id, e.group_id, e.total_received, e.total_sent, e.rank "
        " from email_addr e "
        " where e.email_addr = %s "
    )
    tangelo.content_type("application/json")        
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, target) as qry:
            rtn = [[str(val) for val in row] for row in qry.cursor()]
            return { "email" : rtn }
Example #29
0
def getAttachmentsSender(*args):
    sender=urllib.unquote(nth(args, 0, ''))
    if not sender:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    tangelo.content_type("application/json")        
    stmt = (
        " select id, dir, datetime, from_addr, tos, ccs, bccs, subject, attach, bodysize "
        " from email "
        " where from_addr = %s and attach != '' "
    )
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, sender) as qry:
            rtn = [[ val.encode('utf-8') if isinstance(val, basestring) else str(val) for val in row] for row in qry.cursor()]
            return { "sender": sender, "email_attachments" : rtn }
Example #30
0
def getExploded():
    output = open('./tmp/exploded.csv','w')
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            for dt, frome, to, cc, bcc in qry.cursor():
                for r in to.split(';'):
                    if r:
                        output.write('\t'.join((dt,frome,r.strip())) + '\n')
                for r in cc.split(';'):
                    if r:
                        output.write('\t'.join((dt,frome,r.strip())) + '\n')
                for r in bcc.split(';'):
                    if r:
                        output.write('\t'.join((dt,frome,r.strip())) + '\n')
            output.close()
Example #31
0
def getExploded():
    output = open('./tmp/exploded.csv', 'w')
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            for dt, frome, to, cc, bcc in qry.cursor():
                for r in to.split(';'):
                    if r:
                        output.write('\t'.join((dt, frome, r.strip())) + '\n')
                for r in cc.split(';'):
                    if r:
                        output.write('\t'.join((dt, frome, r.strip())) + '\n')
                for r in bcc.split(';'):
                    if r:
                        output.write('\t'.join((dt, frome, r.strip())) + '\n')
            output.close()
Example #32
0
def email_scores(*args):
    email_id = unquote(nth(args, 0, ''))
    category = nth(args, 1, 'all')
    if not email_id:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing email")

    stmt = (" select score from xref_email_topic_score "
            " where category_id = %s and email_id = %s "
            " order by idx ")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, category, email_id) as qry:
            rtn = [head(r) for r in qry.cursor()]
            tangelo.content_type("application/json")
            return {"scores": rtn, "email": email_id, "category": category}
Example #33
0
def email_scores(*args):
    email_id=unquote(nth(args, 0, ''))
    category=nth(args, 1, 'all')
    if not email_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing email")

    stmt = (
        " select score from xref_email_topic_score "
        " where category_id = %s and email_id = %s "
        " order by idx "
    )

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, category, email_id) as qry:
            rtn = [head(r) for r in qry.cursor()]
            tangelo.content_type("application/json")
            return { "scores" : rtn, "email" : email_id, "category" : category }
Example #34
0
def getAttachmentsSender(*args):
    sender = urllib.unquote(nth(args, 0, ''))
    if not sender:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    tangelo.content_type("application/json")
    stmt = (
        " select id, dir, datetime, from_addr, tos, ccs, bccs, subject, attach, bodysize "
        " from email "
        " where from_addr = %s and attach != '' ")
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, sender) as qry:
            rtn = [[
                val.encode('utf-8')
                if isinstance(val, basestring) else str(val) for val in row
            ] for row in qry.cursor()]
            return {"sender": sender, "email_attachments": rtn}
Example #35
0
def buildExportable(*args):
    webroot = cherrypy.config.get("webroot")
    target = getOpt('target')
    base_src = "{}/emails/{}".format(webroot, target)
    tmp_dir = os.path.abspath("{}/../tmp/".format(webroot))
    download_dir = "{}/downloads/".format(webroot)
    tar_gz = "export_{}".format(fmtNow())
    base_dest = os.path.abspath("{}/../tmp/newman_dl".format(webroot))

    if os.path.exists(base_dest):
        rmrf(base_dest)
    if not os.path.exists(download_dir):
        mkdir(download_dir)
    mkdir(base_dest)

    # Get list of paths...
    stmt = (" SELECT id, dir FROM email WHERE exportable='true' ")
    msg = ''
    paths_to_copy = []
    tangelo.content_type("application/json")
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            for email_id, val in qry.cursor():
                src = "{}/{}/".format(base_src, val)
                dest = "{}/{}/".format(base_dest, val)
                shutil.copytree(src, dest)

    # compress dir
    shutil.make_archive("{}/{}".format(tmp_dir, tar_gz),
                        "gztar",
                        root_dir=base_dest)

    # move to web downloads
    mv("{}/{}.tar.gz".format(tmp_dir, tar_gz),
       "{}/{}.tar.gz".format(download_dir, tar_gz))

    return {"file": "downloads/{}.tar.gz".format(tar_gz)}
Example #36
0
def buildExportable(*args):
    webroot = cherrypy.config.get("webroot")
    target = getOpt('target')	
    base_src = "{}/emails/{}".format(webroot,target)
    tmp_dir = os.path.abspath("{}/../tmp/".format(webroot))
    download_dir = "{}/downloads/".format(webroot)
    tar_gz = "export_{}".format(fmtNow())
    base_dest = os.path.abspath("{}/../tmp/newman_dl".format(webroot))

    if os.path.exists(base_dest):
        rmrf(base_dest)
    if not os.path.exists(download_dir):
        mkdir(download_dir)
    mkdir(base_dest)
	
    # Get list of paths... 
    stmt = (
        " SELECT id, dir FROM email WHERE exportable='true' "
    )
    msg = ''
    paths_to_copy = []
    tangelo.content_type("application/json")        
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            for email_id, val in qry.cursor():
                src = "{}/{}/".format(base_src,val)
                dest = "{}/{}/".format(base_dest, val)
                shutil.copytree(src, dest)

    # compress dir
    shutil.make_archive("{}/{}".format(tmp_dir, tar_gz), "gztar", root_dir=base_dest) 

    # move to web downloads
    mv("{}/{}.tar.gz".format(tmp_dir, tar_gz), "{}/{}.tar.gz".format(download_dir, tar_gz))

    return { "file" : "downloads/{}.tar.gz".format(tar_gz) }
Example #37
0
def queryEntity(email):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_email_entities_by_id, email) as qry:
            tangelo.log("node-vals: %s" % qry.stmt)
            rtn = [r for r in qry.cursor()]
            return rtn if rtn else []
Example #38
0
    " insert into xref_recipients (`from`, recipient, `type`, email_id)"
    " select f.obj as `from`, f2.obj as recipient, f2.predicate, f.subject as email_id"
    " from facts f join facts f2 "
    "     on f.subject = f2.subject"
    " where f.schema_name = 'email'"
    " and f2.schema_name = f.schema_name"
    " and f.predicate = 'from'"
    " and f2.predicate in ('to', 'cc', 'bcc')"
)

if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='Enrich Emails Communications')
    args= parser.parse_args()

    with newman_connector() as read_cnx, newman_connector() as write_cnx:
        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        print "enrich sent to recipient communications"
        facts = Fact(write_cnx.conn(), autocommit=False)
        
        with execute_nonquery(write_cnx.conn(), stmt_sent_to, txid) as qry:
            pass
        write_cnx.commit()
    
        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        print "enrich total sent to " 
        with execute_query(write_cnx.conn(), stmt_total_recipients, txid) as qry:
            pass
        write_cnx.commit()
Example #39
0
def lower(s):
   return s.lower() if s else ''

if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='Ingest Walker Email')
    parser.add_argument("input_tsv", help="input of tsv file")
    args= parser.parse_args()

    headers = ["id","threadid", "dir","category","datetime","from","tos","ccs","bccs","subject","body","tosize","ccsize","attachsize","attach","bodysize","location"]

    #skip header row for counting
    c = counter(-1)

    with newman_connector() as cnx:

        tx = Tx(cnx.conn()).next()
        print "tx: %s" % tx        
        fact = Fact(cnx.conn(), autocommit=False)

        for line in slurpA(args.input_tsv):
            try:
                count = c.next()
                if count % 1000 == 0:
                    print "ingested count - %s " % count
                row = line.split('\t')
                row = (c.strip() for c in row)
            
                num,dir,category,utc_date,importance,fromemail,ip,toemail,ccemail,bccemail,attach,messageid,inreplyto,references,subject,body = row
Example #40
0
def findLineNum(emailid):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_email_to_line_num,
                           emailid) as qry:
            return head(qry.cursor().fetchone())
    if len(buffer) > 0:
        spit(f, "\n".join(buffer) + "\n")


if __name__ == "__main__":

    print "loading NER model..."
    ner = named_entity_extractor('/srv/software/MITIE/MITIE-models/english/ner_model.dat')
    extract = partial(extract_entities, ner)    

    print "\nTags output by this NER model:", ner.get_possible_ner_tags()
    c = counter(1)

    flush_entity = partial(flush_buffer, "tmp/entity_ingest.tsv")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            buffer_entity=[]
            
            for email_id, body in qry.cursor():
                count = c.next()
                if count % 1000 == 0:
                    print "processed: %s " % count
                r = extract(email_id, body)
                for i, item in enumerate(r):
                    email_id, tag_name, entity, offset = item
                    entity_id = "%s_entity_%s" % (email_id, i)
                    buffer_entity.append("\t".join([entity_id, tag_name.lower(), str(i), entity, email_id, str(offset)]))

            flush_entity(buffer_entity)
Example #42
0
def findLineNum(emailid):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_email_to_line_num, emailid) as qry:
            return head(qry.cursor().fetchone())
Example #43
0
    "     group by id "
    "  ) as t2 "
    " on u.rollup_id = t2.id "
    " set u.total_emails = t2.c ")

stmt_populate_xref = (" insert into xref_rollup_entity (rollup_id, entity_id) "
                      " select distinct r.rollup_id, e.subject "
                      " from entity_rollup r join entity e "
                      " on r.type = e.entity_type and r.val = e.value ")

if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='Roll up enities')
    args = parser.parse_args()

    with newman_connector() as write_cnx:
        print "rollup entities"
        with execute_query(write_cnx.conn(), stmt_rollup_entities) as qry:
            pass
        write_cnx.commit()

        print "entity update email totals"
        with execute_query(write_cnx.conn(), stmt_update_rollup_counts) as qry:
            pass
        write_cnx.commit()

        print "populate xref rollup to entity"
        with execute_query(write_cnx.conn(), stmt_populate_xref) as qry:
            pass
        write_cnx.commit()
Example #44
0
if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='Ingest Walker Email')
    parser.add_argument("input_tsv", help="input of tsv file")
    args = parser.parse_args()

    headers = [
        "id", "threadid", "dir", "category", "datetime", "from", "tos", "ccs",
        "bccs", "subject", "body", "tosize", "ccsize", "attachsize", "attach",
        "bodysize", "location"
    ]

    #skip header row for counting
    c = counter(-1)

    with newman_connector() as cnx:

        tx = Tx(cnx.conn()).next()
        print "tx: %s" % tx
        fact = Fact(cnx.conn(), autocommit=False)

        for line in slurpA(args.input_tsv):
            try:
                count = c.next()
                if count % 1000 == 0:
                    print "ingested count - %s " % count
                row = line.split('\t')
                row = (c.strip() for c in row)

                num, dir, category, utc_date, importance, fromemail, ip, toemail, ccemail, bccemail, attach, messageid, inreplyto, references, subject, body = row
Example #45
0
def findEmailId(line_num):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_line_num_to_email,
                           line_num) as qry:
            return head(qry.cursor().fetchone())
Example #46
0
def findEmailId(line_num):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_line_num_to_email, line_num) as qry:
            return head(qry.cursor().fetchone())
Example #47
0
        a = n.split() if n else []
        return ' '.join(a[:5])

    def formatScore(s):
        x = float(s) * 100
        return "{:10.2f}%".format(x)

    def getTopics(_id):
        with newman_connector() as cnx:
            with execute_query(cnx.conn(), topic_stmt, _id) as qry:
                return [{
                    'name': formatName(nth(o, 0)),
                    'score': formatScore(nth(o, 1))
                } for o in qry.cursor()]

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            for row in qry.cursor():
                _id, _dir, _from, tos, ccs, bccs, subject, _date, attachments, body = row
                outdir = "demail/emails/{}/{}".format(args.target, _dir)
                outfile = "{}/{}.html".format(outdir, last(split(_id, "/")))
                topics = getTopics(_id)

                o = {
                    'doc': {
                        'topics': topics,
                        'id': _id,
                        'from': _from,
                        'to': "; ".join(split(tos, ';')),
                        'cc': "; ".join(split(ccs, ';')),
                        'bcc': "; ".join(split(bccs, ';')),
Example #48
0
        " from facts as t1 join facts as t2 "
        "     on t1.subject = t2.subject and t1.schema_name = t2.schema_name "
        " where t1.schema_name = 'email' "
        " and t1.predicate = 'from' "
        " and t2.predicate in ('to', 'cc', 'bcc') "
        " group by t1.obj, t2.obj "
        " ) as bi_dir "
        " GROUP BY source, target "
        " ) as lvn "
        " group by source, target ")

    nodes = []
    node_map = {}
    edges = []

    with newman_connector() as cnx:
        with execute_query(cnx.conn(), stmt) as qry:
            c = counter()
            for row in qry.cursor():
                src, target, weight = row

                if src not in node_map:
                    node_map[src] = c.next()
                    nodes.append({
                        'name': src,
                        'community': 'n/a',
                        'idx': node_map[src]
                    })

                if target not in node_map:
                    node_map[target] = c.next()
Example #49
0
    " set u.total_emails = t2.c "   
)

stmt_populate_xref = (
   " insert into xref_rollup_entity (rollup_id, entity_id) "
   " select distinct r.rollup_id, e.subject "
   " from entity_rollup r join entity e "
   " on r.type = e.entity_type and r.val = e.value "
)

if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='Roll up enities')
    args= parser.parse_args()

    with newman_connector() as write_cnx:
        print "rollup entities" 
        with execute_query(write_cnx.conn(), stmt_rollup_entities) as qry:
            pass
        write_cnx.commit()
        
        print "entity update email totals"
        with execute_query(write_cnx.conn(), stmt_update_rollup_counts) as qry:
            pass
        write_cnx.commit()        
        
        print "populate xref rollup to entity"
        with execute_query(write_cnx.conn(), stmt_populate_xref) as qry:
            pass
        write_cnx.commit()        
Example #50
0
 def getTopics(_id):
     with newman_connector() as cnx:
         with execute_query(cnx.conn(), topic_stmt, _id) as qry:
             return [{'name': formatName(nth(o, 0)), 'score': formatScore(nth(o, 1)) } 
                     for o in qry.cursor()]