def writeRanks(ids): with newman_connector() as read_cnx1, newman_connector( ) as read_cnx, newman_connector() as write_cnx: with execute_query(read_cnx1.conn(), stmt) as qry: txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid facts = Fact(write_cnx.conn(), autocommit=False) print "assigning ranks" for mail in qry.cursor(): #print mail[0] #, "email_addr", "rank", ids.get(mail,'0'), txid facts.addFact(mail[0], "email_addr", "rank", ids.get(mail[0], '0'), txid) print "commit" write_cnx.commit()
for subgraph in clustering.subgraphs(): community_name = jsonGet(['name'], head(subgraph.vs['node']), 'n/a') for node in subgraph.vs['node']: node['community'] = community_name #output format #NODE\tCOMMUNITY # for node in nodes: # print "{}\t{}".format(node['name'], node['community']) count = counter(1) with newman_connector() as read_cnx, newman_connector() as write_cnx: txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid facts = Fact(write_cnx.conn(), autocommit=False) print "assigning communities" for node in nodes: email_addr, community_id = node['name'], node['community'] facts.addFact(email_addr, "email_addr", "community", community_id, txid) facts.addFact(email_addr, "email_addr", "group_id", next(count), txid) print "commit" write_cnx.commit() txid = Tx(read_cnx.conn()).next() print "tx: %s" % txid print "assign community ids" stmt = (
args = parser.parse_args() headers = [ "id", "threadid", "dir", "category", "datetime", "from", "tos", "ccs", "bccs", "subject", "body", "tosize", "ccsize", "attachsize", "attach", "bodysize", "location" ] #skip header row for counting c = counter(-1) with newman_connector() as cnx: tx = Tx(cnx.conn()).next() print "tx: %s" % tx fact = Fact(cnx.conn(), autocommit=False) for line in slurpA(args.input_tsv): try: count = c.next() if count % 1000 == 0: print "ingested count - %s " % count row = line.split('\t') row = (c.strip() for c in row) num, dir, category, utc_date, importance, fromemail, ip, toemail, ccemail, bccemail, attach, messageid, inreplyto, references, subject, body = row fromemail = lower(fromemail) toemail = lower(toemail) ccemail = lower(ccemail) bccemail = lower(bccemail)