def main(input_dir, keyspace, output_table): cluster = Cluster(['199.60.17.188', '199.60.17.216']) session = cluster.connect(keyspace) linesep = re.compile(r'^(\S+) - - \[(\S+) [+-]\d+\] \"[A-Z]+ (\S+) HTTP/\d\.\d\" \d+ (\d+)$') insert_nasalogs = session.prepare("INSERT INTO nasalogs (id, host, datetime, path, bytes) VALUES (?, ?, ?, ?, ?)") batch = BatchStatement(consistency_level=ConsistencyLevel.ONE) session.execute("TRUNCATE " + keyspace + '.' + output_table + ';') for f in os.listdir(input_dir): with gzip.open(os.path.join(input_dir, f), 'rt') as logfile: count = 0 for line in logfile: linesplit = linesep.split(line) if len(linesplit) > 4: batch.add(insert_nasalogs, (uuid.uuid1(), linesplit[1], \ dt.datetime.strptime(linesplit[2], '%d/%b/%Y:%H:%M:%S'), linesplit[3], int(linesplit[4]))) count += 1 if count > 300: session.execute(batch) batch.clear() count = 0 session.execute(batch)
def insert_batch(rec, session, table_name): for each_list in rec: for list_content in each_list: host = list_content[0] datetime = d.datetime.strptime(list_content[1], "%d/%b/%Y:%H:%M:%S") path = list_content[2] byte = int(list_content[3]) insert_statement = session.prepare( "INSERT INTO %s(host,uid,datetime,path,bytes) VALUES (?,UUID(),?,?,?)" % table_name) batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) batch.add(insert_statement, (host, datetime, path, byte)) session.execute(batch) batch.clear()
msg['event_type'] = 'event_profile_change' msg['event'] = {"profile_field_id": 'extid'} channel_basic_publish(msg) msg['event'] = { "profile_field_id": 'profile_proc_location_pred' } channel_basic_publish(msg) elif args.cassandra: events.append(event) # send events to cassandra if args.cassandra: txt = json.dumps(events).decode() batch.add(query, (txt, extid, platform)) if len(batch) == 100: try: casconn.session().execute(batch, timeout=60) except (cassandra.OperationTimedOut, cassandra.WriteTimeout) as e: time.sleep(30) # important, really useful casconn = cassanda_connection() casconn.session().execute(batch, timeout=60) batch.clear() # finally if len(batch): casconn.session().execute(batch, timeout=60)