Ejemplo n.º 1
0
def writeRanks(ids):
    with newman_connector() as read_cnx1, newman_connector() as read_cnx, newman_connector() as write_cnx:
        with execute_query(read_cnx1.conn(), stmt) as qry:
            txid = Tx(read_cnx.conn()).next()
            print "tx: %s" % txid
            facts = Fact(write_cnx.conn(), autocommit=False)
            print "assigning ranks"
            for mail in qry.cursor():
                #print mail[0] #, "email_addr", "rank", ids.get(mail,'0'), txid
                facts.addFact(mail[0], "email_addr", "rank", ids.get(mail[0],'0'), txid)            

            print "commit"
            write_cnx.commit()
Ejemplo n.º 2
0
def writeRanks(ids):
    with newman_connector() as read_cnx1, newman_connector(
    ) as read_cnx, newman_connector() as write_cnx:
        with execute_query(read_cnx1.conn(), stmt) as qry:
            txid = Tx(read_cnx.conn()).next()
            print "tx: %s" % txid
            facts = Fact(write_cnx.conn(), autocommit=False)
            print "assigning ranks"
            for mail in qry.cursor():
                #print mail[0] #, "email_addr", "rank", ids.get(mail,'0'), txid
                facts.addFact(mail[0], "email_addr", "rank",
                              ids.get(mail[0], '0'), txid)

            print "commit"
            write_cnx.commit()
Ejemplo n.º 3
0
    for subgraph in clustering.subgraphs():
        community_name = jsonGet(['name'], head(subgraph.vs['node']), 'n/a')
        for node in subgraph.vs['node']:
            node['community'] = community_name

    #output format
    #NODE\tCOMMUNITY
    # for node in nodes:
    #     print "{}\t{}".format(node['name'], node['community'])

    count = counter(1)
    with newman_connector() as read_cnx, newman_connector() as write_cnx:
        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        facts = Fact(write_cnx.conn(), autocommit=False)
        print "assigning communities"
        for node in nodes:
            email_addr, community_id = node['name'], node['community']
            facts.addFact(email_addr, "email_addr", "community", community_id,
                          txid)
            facts.addFact(email_addr, "email_addr", "group_id", next(count),
                          txid)

        print "commit"
        write_cnx.commit()

        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        print "assign community ids"
        stmt = (
Ejemplo n.º 4
0
    args = parser.parse_args()

    headers = [
        "id", "threadid", "dir", "category", "datetime", "from", "tos", "ccs",
        "bccs", "subject", "body", "tosize", "ccsize", "attachsize", "attach",
        "bodysize", "location"
    ]

    #skip header row for counting
    c = counter(-1)

    with newman_connector() as cnx:

        tx = Tx(cnx.conn()).next()
        print "tx: %s" % tx
        fact = Fact(cnx.conn(), autocommit=False)

        for line in slurpA(args.input_tsv):
            try:
                count = c.next()
                if count % 1000 == 0:
                    print "ingested count - %s " % count
                row = line.split('\t')
                row = (c.strip() for c in row)

                num, dir, category, utc_date, importance, fromemail, ip, toemail, ccemail, bccemail, attach, messageid, inreplyto, references, subject, body = row

                fromemail = lower(fromemail)
                toemail = lower(toemail)
                ccemail = lower(ccemail)
                bccemail = lower(bccemail)
Ejemplo n.º 5
0
    for subgraph in clustering.subgraphs():
        community_name = jsonGet(['name'], head(subgraph.vs['node']), 'n/a')
        for node in subgraph.vs['node']:
            node['community'] = community_name

    #output format 
    #NODE\tCOMMUNITY
    # for node in nodes:
    #     print "{}\t{}".format(node['name'], node['community'])

    count = counter(1)
    with newman_connector() as read_cnx, newman_connector() as write_cnx:
        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        facts = Fact(write_cnx.conn(), autocommit=False)
        print "assigning communities"
        for node in nodes:
            email_addr, community_id  = node['name'], node['community']
            facts.addFact(email_addr, "email_addr", "community", community_id, txid) 
            facts.addFact(email_addr, "email_addr", "group_id", next(count), txid)

        print "commit"
        write_cnx.commit()

        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        print "assign community ids"
        stmt = (
            " insert into facts (subject, schema_name, predicate, obj, tx) "
            " select f.subject, f.schema_name, 'community_id', f2.obj, %s " 
Ejemplo n.º 6
0
    " where f.schema_name = 'email'"
    " and f2.schema_name = f.schema_name"
    " and f.predicate = 'from'"
    " and f2.predicate in ('to', 'cc', 'bcc')")

if __name__ == "__main__":

    parser = argparse.ArgumentParser(
        description='Enrich Emails Communications')
    args = parser.parse_args()

    with newman_connector() as read_cnx, newman_connector() as write_cnx:
        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        print "enrich sent to recipient communications"
        facts = Fact(write_cnx.conn(), autocommit=False)

        with execute_nonquery(write_cnx.conn(), stmt_sent_to, txid) as qry:
            pass
        write_cnx.commit()

        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
        print "enrich total sent to "
        with execute_query(write_cnx.conn(), stmt_total_recipients,
                           txid) as qry:
            pass
        write_cnx.commit()

        txid = Tx(read_cnx.conn()).next()
        print "tx: %s" % txid
Ejemplo n.º 7
0
if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='Ingest Walker Email')
    parser.add_argument("input_tsv", help="input of tsv file")
    args= parser.parse_args()

    headers = ["id","threadid", "dir","category","datetime","from","tos","ccs","bccs","subject","body","tosize","ccsize","attachsize","attach","bodysize","location"]

    #skip header row for counting
    c = counter(-1)

    with newman_connector() as cnx:

        tx = Tx(cnx.conn()).next()
        print "tx: %s" % tx        
        fact = Fact(cnx.conn(), autocommit=False)

        for line in slurpA(args.input_tsv):
            try:
                count = c.next()
                if count % 1000 == 0:
                    print "ingested count - %s " % count
                row = line.split('\t')
                row = (c.strip() for c in row)
            
                num,dir,category,utc_date,importance,fromemail,ip,toemail,ccemail,bccemail,attach,messageid,inreplyto,references,subject,body = row

                fromemail = lower(fromemail)
                toemail = lower(toemail)
                ccemail = lower(ccemail)
                bccemail = lower(bccemail)