Esempio n. 1
0
def emailQueryObj(conn, field, args_array):
    #filter by exportable
    if field.lower() == "exportable": 
        return (conn, stmt_find_emails_filter_export)

    #filter by community
    if field.lower() == "community":
        comm_id = head(args_array)
        return (conn, stmt_find_emails_filter_community, comm_id)


    #filter by topic 
    if field.lower() == "topic": 
        category, idx, score = args_array[:3]
        #todo verify args
        return (conn, stmt_find_emails_filter_topic_score, category, idx, score)
    
    text = head(args_array)    
    # filter by email
    if field.lower() == "email": return (conn, stmt_find_emails_filter_email_addr, text, text)
    if field.lower() == "entity": return (conn, stmt_find_emails_filter_entity, text)
    # filter by text
    if text: return (conn, stmt_find_emails_filter_text)
    # all
    return  (conn, stmt_find_emails)
Esempio n. 2
0
def createResults(field, args_array):

    ## is text search 
    if not field.lower() in ["email", "entity"]:
        text = head(args_array)    
        if text:
            tangelo.log("text search : %s" % text)        
            es = Elasticsearch()
            res = es.search(index="newman", doc_type="emails", size=1000, q=text, body= {"fields": ["_id"], "query": {"match_all": {}}})
            
            ingestESTextResults(jsonGet(['hits','hits'], res, []))
    
    node_vals = getNodeVals(field, args_array)
    colors = {k:v.get("group_id") for k,v in node_vals.iteritems()}

    for k,v in node_vals.iteritems():
        node_vals[k]["color"] = colors.get(k)
    emails = sorted(getEmails(colors, field, args_array), key=lambda x: str(x.get('datetime')))
    idx_lookup = {}
    nodes = []

    for i, o in enumerate(node_vals.iteritems()):
        k,v = o
        idx_lookup[k]=i
        #nodes.append({"name": k, "num": v.get("num"), "rank": v.get("rank"), "group": v.get("color"), "community": colors.get(v.get("comm"))})
        nodes.append({"name": k, "num": v.get("num"), "rank": v.get("rank"), "group": v.get("color"), "community": v.get("comm_id")})
    edges = getEdges(idx_lookup, field, args_array)    

    results = { 'rows': emails, 'graph': { 'nodes': nodes, 'links': edges }}

    return results
Esempio n. 3
0
def createRow(email_id, _dir, target_email, mail, categories, attach, msg_body):
    addr_tostr = lambda arr : ";".join(arr)
    #addrs = lambda arr : [clean_string(addr.lower(), [(r'\'', '')]) for name, addr in getaddresses(arr)]
    def addrs(arr):
        items = []
        arr = [clean_string(s.lower(), [(r',','')]) for s in arr]
        for name, addr in getaddresses(arr):
            if '@' in addr:
                items.append(addr)
            elif '@' in name:
                items.append(name)        
        return  [clean_string(s.lower(), [(r'\'', '')]) for s in items]

    csv_sep = lambda arr : ",".join(arr) if arr else ''
    scolon_sep = lambda arr : ";".join(arr) if arr else '' 
    one = lambda arr : head(arr) if arr else '' 

    msgid= mail.get_all('message-id', None)
    inreplyto = [clean_string(s, [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'] ]) for s in mail.get_all('in-reply-to', [])] 
    references = [clean_string(s, [ EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'] ]) for s in mail.get_all('references', [])]
    mail_date= mail.get_all('date', None)
    subject = mail.get_all('subject', [])
    #importance ??
    #ip ??
    senders = addrs(mail.get_all('from', []))
    senders = [target_email if s == 'mailer-daemon' else s for s in senders]

    tos = addrs(mail.get_all('to', []))
    ccs = addrs(mail.get_all('cc', []))
    bccs = bccList(target_email, senders, tos, ccs, addrs(mail.get_all('bcc', [])))
    subject = clean_string(quopri.decodestring(one(subject)),
                     [            
                         EXPR_OPTS['fix_utf8'], 
                         EXPR_OPTS['fix_tab'], 
                         EXPR_OPTS['fix_newline'], 
                         EXPR_OPTS['fix_cr']])

    body = clean_string(quopri.decodestring(msg_body),
                     [            
                         EXPR_OPTS['fix_utf8'], 
                         EXPR_OPTS['fix_tab'], 
                         EXPR_OPTS['fix_newline'], 
                         EXPR_OPTS['fix_cr']])
    return "\t".join([email_id, _dir, scolon_sep(categories), dateToUTCstr(head(mail_date)) if mail_date else 'NODATE' , '', addr_tostr(senders), '', addr_tostr(tos), addr_tostr(ccs), addr_tostr(bccs), scolon_sep(attach), one(msgid), csv_sep(inreplyto), scolon_sep(references), subject, body])
Esempio n. 4
0
File: imap.py Progetto: mrG7/newman
def createRow(uid, email_dir, target_email, email, attach, msg_body):

    addr_tostr = lambda arr: ";".join(arr)
    addrs = lambda arr: [addr for name, addr in getaddresses(arr)]
    csv_sep = lambda arr: ",".join(arr) if arr else ""
    scolon_sep = lambda arr: ";".join(arr) if arr else ""
    one = lambda arr: head(arr) if arr else ""

    msgid = email.get_all("message-id", None)
    inreplyto = email.get_all("in-reply-to", None)
    # references = email.get_all('references', [])
    mail_date = email.get_all("date", None)
    subject = email.get_all("subject", [])

    senders = addrs(email.get_all("from", []))
    tos = addrs(email.get_all("to", []))
    ccs = addrs(email.get_all("cc", []))
    bccs = bccList(target_email, senders, tos, ccs, addrs(email.get_all("bcc", [])))
    subject = quopri.decodestring(one(subject)).replace("\n", "[:newline:]").replace("\r", "").replace("\t", " ")
    body = quopri.decodestring(msg_body).replace("\n", "[:newline:]").replace("\r", "").replace("\t", " ")
    subject = re.sub(r"[^\x00-\x7F]", " ", subject)
    body = re.sub(r"[^\x00-\x7F]", " ", body)

    return "\t".join(
        [
            uid,
            email_dir,
            "",
            dateToUTCstr(head(mail_date)) if mail_date else "NODATE",
            "",
            addr_tostr(senders),
            "",
            addr_tostr(tos),
            addr_tostr(ccs),
            addr_tostr(bccs),
            scolon_sep(attach),
            one(msgid),
            csv_sep(inreplyto),
            "",
            subject,
            body,
        ]
    )
Esempio n. 5
0
def createRow(uid, email_dir, target_email, email, attach, msg_body):

    addr_tostr = lambda arr: ";".join(arr)
    addrs = lambda arr: [addr for name, addr in getaddresses(arr)]
    csv_sep = lambda arr: ",".join(arr) if arr else ''
    scolon_sep = lambda arr: ";".join(arr) if arr else ''
    one = lambda arr: head(arr) if arr else ''

    msgid = email.get_all('message-id', None)
    inreplyto = email.get_all('in-reply-to', None)
    #references = email.get_all('references', [])
    mail_date = email.get_all('date', None)
    subject = email.get_all('subject', [])

    senders = addrs(email.get_all('from', []))
    tos = addrs(email.get_all('to', []))
    ccs = addrs(email.get_all('cc', []))
    bccs = bccList(target_email, senders, tos, ccs,
                   addrs(email.get_all('bcc', [])))
    subject = quopri.decodestring(one(subject)).replace(
        '\n', '[:newline:]').replace('\r', '').replace('\t', ' ')
    body = quopri.decodestring(msg_body).replace('\n', '[:newline:]').replace(
        '\r', '').replace('\t', ' ')
    subject = re.sub(r'[^\x00-\x7F]', ' ', subject)
    body = re.sub(r'[^\x00-\x7F]', ' ', body)

    return "\t".join([
        uid, email_dir, '',
        dateToUTCstr(head(mail_date)) if mail_date else 'NODATE', '',
        addr_tostr(senders), '',
        addr_tostr(tos),
        addr_tostr(ccs),
        addr_tostr(bccs),
        scolon_sep(attach),
        one(msgid),
        csv_sep(inreplyto), '', subject, body
    ])
Esempio n. 6
0
def createRow(uid, email_dir, target_email, email, attach, msg_body):

    addr_tostr = lambda arr : ";".join(arr)
    addrs = lambda arr : [addr for name, addr in getaddresses(arr)]
    csv_sep = lambda arr : ",".join(arr) if arr else ''
    scolon_sep = lambda arr : ";".join(arr) if arr else '' 
    one = lambda arr : head(arr) if arr else '' 

    msgid= email.get_all('message-id', None)
    inreplyto = email.get_all('in-reply-to', None)
    #references = email.get_all('references', [])
    mail_date= email.get_all('date', None)
    subject = email.get_all('subject', [])

    senders = addrs(email.get_all('from', []))
    tos = addrs(email.get_all('to', []))
    ccs = addrs(email.get_all('cc', []))
    bccs = bccList(target_email, senders, tos, ccs, addrs(email.get_all('bcc', [])))
    subject = quopri.decodestring(one(subject)).replace('\n', '[:newline:]').replace('\r', '').replace('\t', ' ')
    body = quopri.decodestring(msg_body).replace('\n', '[:newline:]').replace('\r', '').replace('\t', ' ')
    subject = re.sub(r'[^\x00-\x7F]',' ', subject)
    body = re.sub(r'[^\x00-\x7F]',' ', body)

    return "\t".join([uid, email_dir, '', dateToUTCstr(head(mail_date)) if mail_date else 'NODATE' , '', addr_tostr(senders), '', addr_tostr(tos), addr_tostr(ccs), addr_tostr(bccs), scolon_sep(attach), one(msgid), csv_sep(inreplyto), '', subject, body])
Esempio n. 7
0
def email_scores(*args):
    email_id = unquote(nth(args, 0, ''))
    category = nth(args, 1, 'all')
    if not email_id:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing email")

    stmt = (" select score from xref_email_topic_score "
            " where category_id = %s and email_id = %s "
            " order by idx ")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, category, email_id) as qry:
            rtn = [head(r) for r in qry.cursor()]
            tangelo.content_type("application/json")
            return {"scores": rtn, "email": email_id, "category": category}
Esempio n. 8
0
def email_scores(*args):
    email_id=unquote(nth(args, 0, ''))
    category=nth(args, 1, 'all')
    if not email_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing email")

    stmt = (
        " select score from xref_email_topic_score "
        " where category_id = %s and email_id = %s "
        " order by idx "
    )

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, category, email_id) as qry:
            rtn = [head(r) for r in qry.cursor()]
            tangelo.content_type("application/json")
            return { "scores" : rtn, "email" : email_id, "category" : category }
Esempio n. 9
0
                        'name': target,
                        'community': 'n/a',
                        'idx': node_map[target]
                    })

                edges.append((node_map[src], node_map[target]))

    g = igraph.Graph(len(nodes) + 1)
    g.add_edges(edges)
    g.vs['node'] = nodes

    g = g.as_undirected(mode='collapse')
    clustering = g.community_multilevel()

    for subgraph in clustering.subgraphs():
        community_name = jsonGet(['name'], head(subgraph.vs['node']), 'n/a')
        for node in subgraph.vs['node']:
            node['community'] = community_name

    #output format
    #NODE\tCOMMUNITY
    # for node in nodes:
    #     print "{}\t{}".format(node['name'], node['community'])

    count = counter(1)
    with newman_connector() as read_cnx, newman_connector() as write_cnx:
        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        facts = Fact(write_cnx.conn(), autocommit=False)
        print "assigning communities"
        for node in nodes:
Esempio n. 10
0
from newman.utils.file import slurpA
from newman.utils.functions import head,last,nth

if __name__ == "__main__":

    recipients ={}

    SourceEmail = sys.argv[1]

    lines = slurpA("tmp/exploded.csv")
    for line in lines:
        (dt,src,target) = line.strip().split('\t')        
        
        if src != SourceEmail or target == SourceEmail:
            continue
        else:

            if target in recipients:
                recipients[target] += 1
            else:
                recipients[target] = 1

    ranked = sorted(recipients.items(),key=lambda x:(-x[1],x[0]))[:20]
    top = float(nth(head(ranked), 1))
    step = 1.0/top
    fn = lambda x,y : (x, y * step)

    for k,v in ranked:
        print "{0:.2f}:{1}".format((v*step), k)
Esempio n. 11
0
from newman.utils.file import slurpA
from newman.utils.functions import head, last, nth

if __name__ == "__main__":

    recipients = {}

    SourceEmail = sys.argv[1]

    lines = slurpA("tmp/exploded.csv")
    for line in lines:
        (dt, src, target) = line.strip().split('\t')

        if src != SourceEmail or target == SourceEmail:
            continue
        else:

            if target in recipients:
                recipients[target] += 1
            else:
                recipients[target] = 1

    ranked = sorted(recipients.items(), key=lambda x: (-x[1], x[0]))[:20]
    top = float(nth(head(ranked), 1))
    step = 1.0 / top
    fn = lambda x, y: (x, y * step)

    for k, v in ranked:
        print "{0:.2f}:{1}".format((v * step), k)
Esempio n. 12
0
def findEmailId(line_num):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_line_num_to_email,
                           line_num) as qry:
            return head(qry.cursor().fetchone())
Esempio n. 13
0
                    node_map[target] = c.next()
                    nodes.append({'name': target, 
                                  'community': 'n/a', 
                                  'idx': node_map[target] })

                edges.append((node_map[src], node_map[target]))

    g = igraph.Graph(len(nodes)+1)
    g.add_edges(edges)
    g.vs['node'] = nodes

    g = g.as_undirected(mode='collapse')
    clustering = g.community_multilevel()

    for subgraph in clustering.subgraphs():
        community_name = jsonGet(['name'], head(subgraph.vs['node']), 'n/a')
        for node in subgraph.vs['node']:
            node['community'] = community_name

    #output format 
    #NODE\tCOMMUNITY
    # for node in nodes:
    #     print "{}\t{}".format(node['name'], node['community'])

    count = counter(1)
    with newman_connector() as read_cnx, newman_connector() as write_cnx:
        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        facts = Fact(write_cnx.conn(), autocommit=False)
        print "assigning communities"
        for node in nodes:
Esempio n. 14
0
def createRow(email_id, _dir, target_email, mail, categories, attach,
              msg_body):
    addr_tostr = lambda arr: ";".join(arr)

    #addrs = lambda arr : [clean_string(addr.lower(), [(r'\'', '')]) for name, addr in getaddresses(arr)]
    def addrs(arr):
        items = []
        arr = [clean_string(s.lower(), [(r',', '')]) for s in arr]
        for name, addr in getaddresses(arr):
            if '@' in addr:
                items.append(addr)
            elif '@' in name:
                items.append(name)
        return [clean_string(s.lower(), [(r'\'', '')]) for s in items]

    csv_sep = lambda arr: ",".join(arr) if arr else ''
    scolon_sep = lambda arr: ";".join(arr) if arr else ''
    one = lambda arr: head(arr) if arr else ''

    msgid = mail.get_all('message-id', None)
    inreplyto = [
        clean_string(s, [
            EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'],
            EXPR_OPTS['fix_newline']
        ]) for s in mail.get_all('in-reply-to', [])
    ]
    references = [
        clean_string(s, [
            EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'],
            EXPR_OPTS['fix_newline']
        ]) for s in mail.get_all('references', [])
    ]
    mail_date = mail.get_all('date', None)
    subject = mail.get_all('subject', [])
    #importance ??
    #ip ??
    senders = addrs(mail.get_all('from', []))
    senders = [target_email if s == 'mailer-daemon' else s for s in senders]

    tos = addrs(mail.get_all('to', []))
    ccs = addrs(mail.get_all('cc', []))
    bccs = bccList(target_email, senders, tos, ccs,
                   addrs(mail.get_all('bcc', [])))
    subject = clean_string(quopri.decodestring(one(subject)), [
        EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'],
        EXPR_OPTS['fix_cr']
    ])

    body = clean_string(quopri.decodestring(msg_body), [
        EXPR_OPTS['fix_utf8'], EXPR_OPTS['fix_tab'], EXPR_OPTS['fix_newline'],
        EXPR_OPTS['fix_cr']
    ])
    return "\t".join([
        email_id, _dir,
        scolon_sep(categories),
        dateToUTCstr(head(mail_date)) if mail_date else 'NODATE', '',
        addr_tostr(senders), '',
        addr_tostr(tos),
        addr_tostr(ccs),
        addr_tostr(bccs),
        scolon_sep(attach),
        one(msgid),
        csv_sep(inreplyto),
        scolon_sep(references), subject, body
    ])
Esempio n. 15
0
def findEmailId(line_num):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_line_num_to_email, line_num) as qry:
            return head(qry.cursor().fetchone())
Esempio n. 16
0
def findLineNum(emailid):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_email_to_line_num, emailid) as qry:
            return head(qry.cursor().fetchone())
Esempio n. 17
0
def findLineNum(emailid):
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_email_to_line_num,
                           emailid) as qry:
            return head(qry.cursor().fetchone())