def emailQueryObj(conn, field, args_array): #filter by exportable if field.lower() == "exportable": return (conn, stmt_find_emails_filter_export) #filter by community if field.lower() == "community": comm_id = head(args_array) return (conn, stmt_find_emails_filter_community, comm_id) #filter by topic if field.lower() == "topic": category, idx, score = args_array[:3] #todo verify args return (conn, stmt_find_emails_filter_topic_score, category, idx, score) text = head(args_array) # filter by email if field.lower() == "email": return (conn, stmt_find_emails_filter_email_addr, text, text) if field.lower() == "entity": return (conn, stmt_find_emails_filter_entity, text) # filter by text if text: return (conn, stmt_find_emails_filter_text) # all return (conn, stmt_find_emails)
def createResults(field, args_array): ## is text search if not field.lower() in ["email", "entity"]: text = head(args_array) if text: tangelo.log("text search : %s" % text) es = Elasticsearch() res = es.search(index="newman", doc_type="emails", size=1000, q=text, body= {"fields": ["_id"], "query": {"match_all": {}}}) ingestESTextResults(jsonGet(['hits','hits'], res, [])) node_vals = getNodeVals(field, args_array) colors = {k:v.get("group_id") for k,v in node_vals.iteritems()} for k,v in node_vals.iteritems(): node_vals[k]["color"] = colors.get(k) emails = sorted(getEmails(colors, field, args_array), key=lambda x: str(x.get('datetime'))) idx_lookup = {} nodes = [] for i, o in enumerate(node_vals.iteritems()): k,v = o idx_lookup[k]=i #nodes.append({"name": k, "num": v.get("num"), "rank": v.get("rank"), "group": v.get("color"), "community": colors.get(v.get("comm"))}) nodes.append({"name": k, "num": v.get("num"), "rank": v.get("rank"), "group": v.get("color"), "community": v.get("comm_id")}) edges = getEdges(idx_lookup, field, args_array) results = { 'rows': emails, 'graph': { 'nodes': nodes, 'links': edges }} return results
def createRow(email_id, _dir, target_email, mail, categories, attach, msg_body): addr_tostr = lambda arr : ";".join(arr) #addrs = lambda arr : [clean_string(addr.lower(), [(r'\'', '')]) for name, addr in getaddresses(arr)] def addrs(arr): items = [] arr = [clean_string(s.lower(), [(r',','')]) for s in arr] for name, addr in getaddresses(arr): if '@' in addr: items.append(addr) elif '@' in name: items.append(name) return [clean_string(s.lower(), [(r'\'', '')]) for s in items] csv_sep = lambda arr : ",".join(arr) if arr else '' scolon_sep = lambda arr : ";".join(arr) if arr else '' one = lambda arr : head(arr) if arr else '' msgid= mail.get_all('message-id', None) inreplyto = [clean_string(s, [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'] ]) for s in mail.get_all('in-reply-to', [])] references = [clean_string(s, [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'] ]) for s in mail.get_all('references', [])] mail_date= mail.get_all('date', None) subject = mail.get_all('subject', []) #importance ?? #ip ?? senders = addrs(mail.get_all('from', [])) senders = [target_email if s == 'mailer-daemon' else s for s in senders] tos = addrs(mail.get_all('to', [])) ccs = addrs(mail.get_all('cc', [])) bccs = bccList(target_email, senders, tos, ccs, addrs(mail.get_all('bcc', []))) subject = clean_string(quopri.decodestring(one(subject)), [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'], EXPR_OPTS['fix_cr']]) body = clean_string(quopri.decodestring(msg_body), [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'], EXPR_OPTS['fix_cr']]) return "\t".join([email_id, _dir, scolon_sep(categories), dateToUTCstr(head(mail_date)) if mail_date else 'NODATE' , '', addr_tostr(senders), '', addr_tostr(tos), addr_tostr(ccs), addr_tostr(bccs), scolon_sep(attach), one(msgid), csv_sep(inreplyto), scolon_sep(references), subject, body])
def createRow(uid, email_dir, target_email, email, attach, msg_body): addr_tostr = lambda arr: ";".join(arr) addrs = lambda arr: [addr for name, addr in getaddresses(arr)] csv_sep = lambda arr: ",".join(arr) if arr else "" scolon_sep = lambda arr: ";".join(arr) if arr else "" one = lambda arr: head(arr) if arr else "" msgid = email.get_all("message-id", None) inreplyto = email.get_all("in-reply-to", None) # references = email.get_all('references', []) mail_date = email.get_all("date", None) subject = email.get_all("subject", []) senders = addrs(email.get_all("from", [])) tos = addrs(email.get_all("to", [])) ccs = addrs(email.get_all("cc", [])) bccs = bccList(target_email, senders, tos, ccs, addrs(email.get_all("bcc", []))) subject = quopri.decodestring(one(subject)).replace("\n", "[:newline:]").replace("\r", "").replace("\t", " ") body = quopri.decodestring(msg_body).replace("\n", "[:newline:]").replace("\r", "").replace("\t", " ") subject = re.sub(r"[^\x00-\x7F]", " ", subject) body = re.sub(r"[^\x00-\x7F]", " ", body) return "\t".join( [ uid, email_dir, "", dateToUTCstr(head(mail_date)) if mail_date else "NODATE", "", addr_tostr(senders), "", addr_tostr(tos), addr_tostr(ccs), addr_tostr(bccs), scolon_sep(attach), one(msgid), csv_sep(inreplyto), "", subject, body, ] )
def createRow(uid, email_dir, target_email, email, attach, msg_body): addr_tostr = lambda arr: ";".join(arr) addrs = lambda arr: [addr for name, addr in getaddresses(arr)] csv_sep = lambda arr: ",".join(arr) if arr else '' scolon_sep = lambda arr: ";".join(arr) if arr else '' one = lambda arr: head(arr) if arr else '' msgid = email.get_all('message-id', None) inreplyto = email.get_all('in-reply-to', None) #references = email.get_all('references', []) mail_date = email.get_all('date', None) subject = email.get_all('subject', []) senders = addrs(email.get_all('from', [])) tos = addrs(email.get_all('to', [])) ccs = addrs(email.get_all('cc', [])) bccs = bccList(target_email, senders, tos, ccs, addrs(email.get_all('bcc', []))) subject = quopri.decodestring(one(subject)).replace( '\n', '[:newline:]').replace('\r', '').replace('\t', ' ') body = quopri.decodestring(msg_body).replace('\n', '[:newline:]').replace( '\r', '').replace('\t', ' ') subject = re.sub(r'[^\x00-\x7F]', ' ', subject) body = re.sub(r'[^\x00-\x7F]', ' ', body) return "\t".join([ uid, email_dir, '', dateToUTCstr(head(mail_date)) if mail_date else 'NODATE', '', addr_tostr(senders), '', addr_tostr(tos), addr_tostr(ccs), addr_tostr(bccs), scolon_sep(attach), one(msgid), csv_sep(inreplyto), '', subject, body ])
def createRow(uid, email_dir, target_email, email, attach, msg_body): addr_tostr = lambda arr : ";".join(arr) addrs = lambda arr : [addr for name, addr in getaddresses(arr)] csv_sep = lambda arr : ",".join(arr) if arr else '' scolon_sep = lambda arr : ";".join(arr) if arr else '' one = lambda arr : head(arr) if arr else '' msgid= email.get_all('message-id', None) inreplyto = email.get_all('in-reply-to', None) #references = email.get_all('references', []) mail_date= email.get_all('date', None) subject = email.get_all('subject', []) senders = addrs(email.get_all('from', [])) tos = addrs(email.get_all('to', [])) ccs = addrs(email.get_all('cc', [])) bccs = bccList(target_email, senders, tos, ccs, addrs(email.get_all('bcc', []))) subject = quopri.decodestring(one(subject)).replace('\n', '[:newline:]').replace('\r', '').replace('\t', ' ') body = quopri.decodestring(msg_body).replace('\n', '[:newline:]').replace('\r', '').replace('\t', ' ') subject = re.sub(r'[^\x00-\x7F]',' ', subject) body = re.sub(r'[^\x00-\x7F]',' ', body) return "\t".join([uid, email_dir, '', dateToUTCstr(head(mail_date)) if mail_date else 'NODATE' , '', addr_tostr(senders), '', addr_tostr(tos), addr_tostr(ccs), addr_tostr(bccs), scolon_sep(attach), one(msgid), csv_sep(inreplyto), '', subject, body])
def email_scores(*args): email_id = unquote(nth(args, 0, '')) category = nth(args, 1, 'all') if not email_id: return tangelo.HTTPStatusCode(400, "invalid service call - missing email") stmt = (" select score from xref_email_topic_score " " where category_id = %s and email_id = %s " " order by idx ") with newman_connector() as read_cnx: with execute_query(read_cnx.conn(), stmt, category, email_id) as qry: rtn = [head(r) for r in qry.cursor()] tangelo.content_type("application/json") return {"scores": rtn, "email": email_id, "category": category}
def email_scores(*args): email_id=unquote(nth(args, 0, '')) category=nth(args, 1, 'all') if not email_id: return tangelo.HTTPStatusCode(400, "invalid service call - missing email") stmt = ( " select score from xref_email_topic_score " " where category_id = %s and email_id = %s " " order by idx " ) with newman_connector() as read_cnx: with execute_query(read_cnx.conn(), stmt, category, email_id) as qry: rtn = [head(r) for r in qry.cursor()] tangelo.content_type("application/json") return { "scores" : rtn, "email" : email_id, "category" : category }
'name': target, 'community': 'n/a', 'idx': node_map[target] }) edges.append((node_map[src], node_map[target])) g = igraph.Graph(len(nodes) + 1) g.add_edges(edges) g.vs['node'] = nodes g = g.as_undirected(mode='collapse') clustering = g.community_multilevel() for subgraph in clustering.subgraphs(): community_name = jsonGet(['name'], head(subgraph.vs['node']), 'n/a') for node in subgraph.vs['node']: node['community'] = community_name #output format #NODE\tCOMMUNITY # for node in nodes: # print "{}\t{}".format(node['name'], node['community']) count = counter(1) with newman_connector() as read_cnx, newman_connector() as write_cnx: txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid facts = Fact(write_cnx.conn(), autocommit=False) print "assigning communities" for node in nodes:
from newman.utils.file import slurpA from newman.utils.functions import head,last,nth if __name__ == "__main__": recipients ={} SourceEmail = sys.argv[1] lines = slurpA("tmp/exploded.csv") for line in lines: (dt,src,target) = line.strip().split('\t') if src != SourceEmail or target == SourceEmail: continue else: if target in recipients: recipients[target] += 1 else: recipients[target] = 1 ranked = sorted(recipients.items(),key=lambda x:(-x[1],x[0]))[:20] top = float(nth(head(ranked), 1)) step = 1.0/top fn = lambda x,y : (x, y * step) for k,v in ranked: print "{0:.2f}:{1}".format((v*step), k)
from newman.utils.file import slurpA from newman.utils.functions import head, last, nth if __name__ == "__main__": recipients = {} SourceEmail = sys.argv[1] lines = slurpA("tmp/exploded.csv") for line in lines: (dt, src, target) = line.strip().split('\t') if src != SourceEmail or target == SourceEmail: continue else: if target in recipients: recipients[target] += 1 else: recipients[target] = 1 ranked = sorted(recipients.items(), key=lambda x: (-x[1], x[0]))[:20] top = float(nth(head(ranked), 1)) step = 1.0 / top fn = lambda x, y: (x, y * step) for k, v in ranked: print "{0:.2f}:{1}".format((v * step), k)
def findEmailId(line_num): with newman_connector() as read_cnx: with execute_query(read_cnx.conn(), stmt_line_num_to_email, line_num) as qry: return head(qry.cursor().fetchone())
node_map[target] = c.next() nodes.append({'name': target, 'community': 'n/a', 'idx': node_map[target] }) edges.append((node_map[src], node_map[target])) g = igraph.Graph(len(nodes)+1) g.add_edges(edges) g.vs['node'] = nodes g = g.as_undirected(mode='collapse') clustering = g.community_multilevel() for subgraph in clustering.subgraphs(): community_name = jsonGet(['name'], head(subgraph.vs['node']), 'n/a') for node in subgraph.vs['node']: node['community'] = community_name #output format #NODE\tCOMMUNITY # for node in nodes: # print "{}\t{}".format(node['name'], node['community']) count = counter(1) with newman_connector() as read_cnx, newman_connector() as write_cnx: txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid facts = Fact(write_cnx.conn(), autocommit=False) print "assigning communities" for node in nodes:
def createRow(email_id, _dir, target_email, mail, categories, attach, msg_body): addr_tostr = lambda arr: ";".join(arr) #addrs = lambda arr : [clean_string(addr.lower(), [(r'\'', '')]) for name, addr in getaddresses(arr)] def addrs(arr): items = [] arr = [clean_string(s.lower(), [(r',', '')]) for s in arr] for name, addr in getaddresses(arr): if '@' in addr: items.append(addr) elif '@' in name: items.append(name) return [clean_string(s.lower(), [(r'\'', '')]) for s in items] csv_sep = lambda arr: ",".join(arr) if arr else '' scolon_sep = lambda arr: ";".join(arr) if arr else '' one = lambda arr: head(arr) if arr else '' msgid = mail.get_all('message-id', None) inreplyto = [ clean_string(s, [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'] ]) for s in mail.get_all('in-reply-to', []) ] references = [ clean_string(s, [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'] ]) for s in mail.get_all('references', []) ] mail_date = mail.get_all('date', None) subject = mail.get_all('subject', []) #importance ?? #ip ?? senders = addrs(mail.get_all('from', [])) senders = [target_email if s == 'mailer-daemon' else s for s in senders] tos = addrs(mail.get_all('to', [])) ccs = addrs(mail.get_all('cc', [])) bccs = bccList(target_email, senders, tos, ccs, addrs(mail.get_all('bcc', []))) subject = clean_string(quopri.decodestring(one(subject)), [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'], EXPR_OPTS['fix_cr'] ]) body = clean_string(quopri.decodestring(msg_body), [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'], EXPR_OPTS['fix_cr'] ]) return "\t".join([ email_id, _dir, scolon_sep(categories), dateToUTCstr(head(mail_date)) if mail_date else 'NODATE', '', addr_tostr(senders), '', addr_tostr(tos), addr_tostr(ccs), addr_tostr(bccs), scolon_sep(attach), one(msgid), csv_sep(inreplyto), scolon_sep(references), subject, body ])
def findLineNum(emailid): with newman_connector() as read_cnx: with execute_query(read_cnx.conn(), stmt_email_to_line_num, emailid) as qry: return head(qry.cursor().fetchone())